From: phil Date: Wed, 2 Jun 2004 15:06:23 +0000 (+0000) Subject: landing b_cmobd_merge on HEAD X-Git-Tag: v1_7_100~2269 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=2dc9c16e770415d56839e1996015fec5fab93f29 landing b_cmobd_merge on HEAD b_cmobd_merge should not be used for any further development or testing! --- diff --git a/ldiskfs/kernel_patches/patches/export-ext3-2.6-suse.patch b/ldiskfs/kernel_patches/patches/export-ext3-2.6-suse.patch new file mode 100644 index 0000000..638b4bf --- /dev/null +++ b/ldiskfs/kernel_patches/patches/export-ext3-2.6-suse.patch @@ -0,0 +1,33 @@ +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2004-04-02 16:40:18.000000000 -0500 ++++ linux-stage/fs/ext3/super.c 2004-04-02 16:40:18.000000000 -0500 +@@ -115,6 +115,8 @@ + handle->h_err = err; + } + ++EXPORT_SYMBOL(ext3_journal_abort_handle); ++ + static char error_buf[1024]; + + /* Deal with the reporting of failure conditions on a filesystem such as +@@ -1774,6 +1776,8 @@ + return ret; + } + ++EXPORT_SYMBOL(ext3_force_commit); ++ + /* + * Ext3 always journals updates to the superblock itself, so we don't + * have to propagate any other updates to the superblock on disk at this +@@ -2088,6 +2092,10 @@ + unsigned long *blocks, int *created, int create); + EXPORT_SYMBOL(ext3_map_inode_page); + ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_set_handle); ++EXPORT_SYMBOL(ext3_bread); ++ + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); + MODULE_LICENSE("GPL"); diff --git a/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch b/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch new file mode 100644 index 0000000..2327263 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch @@ -0,0 +1,17 @@ +Index: linux-2.6.0/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.0.orig/include/linux/ext3_fs_sb.h 2003-06-24 18:05:26.000000000 +0400 ++++ linux-2.6.0/include/linux/ext3_fs_sb.h 2003-12-31 01:09:26.000000000 +0300 +@@ -19,9 +19,12 @@ + #ifdef __KERNEL__ + #include + #include ++#ifndef EXT_INCLUDE ++#define EXT_INCLUDE + #include + #include + #endif ++#endif + + /* + * third extended-fs super-block data in memory diff --git a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6.0.patch b/ldiskfs/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch similarity index 98% rename from lustre/kernel_patches/patches/ext3-ea-in-inode-2.6.0.patch rename to ldiskfs/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch index 5e755d9..39fa9ce 100644 --- a/lustre/kernel_patches/patches/ext3-ea-in-inode-2.6.0.patch +++ b/ldiskfs/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch @@ -189,7 +189,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + /* try to find attribute in inode body */ + err = ext3_xattr_ibody_get(inode, name_index, name, + buffer, buffer_size); -+ if (err < 0) ++ if (err < 0) + /* search was unsuccessful, try to find EA in dedicated block */ + err = ext3_xattr_block_get(inode, name_index, name, + buffer, buffer_size); @@ -252,7 +252,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + return 0; + + ret = ext3_get_inode_loc(inode, &iloc); -+ if (ret) ++ if (ret) + return ret; + raw_inode = ext3_raw_inode(&iloc); + @@ -334,7 +334,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + /* get list of attributes stored in inode body */ + error = ext3_xattr_ibody_list(inode, buffer, buffer_size); + if (error < 0) { -+ /* some error occured while collecting ++ /* some error occured while collecting + * attributes in inode body */ + size = 0; + goto cleanup; @@ -347,12 +347,12 @@ Index: linux-2.6.0/fs/ext3/xattr.c + if (buffer_size <= 0) { + buffer = NULL; + buffer_size = 0; -+ } else ++ } else + buffer += error; + } + + error = ext3_xattr_block_list(inode, buffer, buffer_size); -+ if (error < 0) ++ if (error < 0) + /* listing was successful, so we return len */ + size = 0; + @@ -374,7 +374,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + * NOTE: free space includes space our attribute hold + */ +int -+ext3_xattr_ibody_find(struct inode *inode, int name_index, ++ext3_xattr_ibody_find(struct inode *inode, int name_index, + const char *name, struct ext3_xattr_entry *rentry, int *free) +{ + struct ext3_xattr_entry *last; @@ -389,7 +389,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + return ret; + + err = ext3_get_inode_loc(inode, &iloc); -+ if (err) ++ if (err) + return -EIO; + raw_inode = ext3_raw_inode(&iloc); + @@ -498,7 +498,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + +/* + * ext3_xattr_inode_set() -+ * ++ * + * this routine add/remove/replace attribute in inode body + */ +int @@ -558,7 +558,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + } + if (name_index == last->e_name_index && + name_len == last->e_name_len && -+ !memcmp(name, last->e_name, name_len)) ++ !memcmp(name, last->e_name, name_len)) + here = last; + else { + /* we calculate all but our attribute @@ -595,9 +595,9 @@ Index: linux-2.6.0/fs/ext3/xattr.c + e = (struct ext3_xattr_entry *) start; + while (!IS_LAST_ENTRY(e)) { + struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(e); -+ int offs = le16_to_cpu(e->e_value_offs); -+ if (offs < border) -+ e->e_value_offs = ++ int offs = le16_to_cpu(e->e_value_offs); ++ if (offs < border) ++ e->e_value_offs = + cpu_to_le16(offs + size); + e = next; + } @@ -741,7 +741,7 @@ Index: linux-2.6.0/fs/ext3/xattr.c + +/* + * ext3_xattr_block_set() -+ * ++ * + * this routine add/remove/replace attribute in EA block + */ +int @@ -815,7 +815,7 @@ Index: linux-2.6.0/include/linux/ext3_fs_i.h */ loff_t i_disksize; -+ /* on-disk additional lenght */ ++ /* on-disk additional length */ + __u16 i_extra_isize; + /* diff --git a/ldiskfs/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch new file mode 100644 index 0000000..28e3587 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch @@ -0,0 +1,20 @@ +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h 2004-04-02 16:43:37.000000000 -0500 ++++ linux-stage/include/linux/ext3_fs.h 2004-04-02 16:43:37.000000000 -0500 +@@ -331,12 +331,13 @@ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ +-#ifndef _LINUX_EXT2_FS_H ++#ifndef clear_opt + #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt + #define set_opt(o, opt) o |= EXT3_MOUNT_##opt + #define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ + EXT3_MOUNT_##opt) +-#else ++#endif ++#ifndef EXT2_MOUNT_NOLOAD + #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD + #define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT + #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS diff --git a/lustre/kernel_patches/patches/ext3-map_inode_page-2.6.0.patch b/ldiskfs/kernel_patches/patches/ext3-map_inode_page-2.6-suse.patch similarity index 100% rename from lustre/kernel_patches/patches/ext3-map_inode_page-2.6.0.patch rename to ldiskfs/kernel_patches/patches/ext3-map_inode_page-2.6-suse.patch diff --git a/lustre/kernel_patches/patches/ext3-san-jdike-2.5.73.patch b/ldiskfs/kernel_patches/patches/ext3-san-jdike-2.6-suse.patch similarity index 100% rename from lustre/kernel_patches/patches/ext3-san-jdike-2.5.73.patch rename to ldiskfs/kernel_patches/patches/ext3-san-jdike-2.6-suse.patch diff --git a/lustre/kernel_patches/patches/ext3-wantedi-2.6.3.patch b/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-suse.patch similarity index 91% rename from lustre/kernel_patches/patches/ext3-wantedi-2.6.3.patch rename to ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-suse.patch index 3f20da7..a4867a5 100644 --- a/lustre/kernel_patches/patches/ext3-wantedi-2.6.3.patch +++ b/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-suse.patch @@ -154,22 +154,6 @@ Index: uml-2.6.3/fs/ext3/namei.c err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; -Index: uml-2.6.3/include/linux/dcache.h -=================================================================== ---- uml-2.6.3.orig/include/linux/dcache.h 2004-02-21 00:19:14.365988600 +0800 -+++ uml-2.6.3/include/linux/dcache.h 2004-02-21 00:21:04.612228624 +0800 -@@ -25,6 +25,11 @@ - - #define IS_ROOT(x) ((x) == (x)->d_parent) - -+struct dentry_params { -+ unsigned long p_inum; -+ void *p_ptr; -+}; -+ - /* - * "quick string" -- eases parameter passing, but more importantly - * saves "metadata" about the string (ie length and the hash). Index: uml-2.6.3/include/linux/ext3_fs.h =================================================================== --- uml-2.6.3.orig/include/linux/ext3_fs.h 2004-01-09 14:59:44.000000000 +0800 diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch new file mode 100644 index 0000000..2133355 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch @@ -0,0 +1,475 @@ + fs/ext3/inode.c | 3 + fs/ext3/iopen.c | 239 +++++++++++++++++++++++++++++++++++++ + fs/ext3/iopen.h | 15 ++ + fs/ext3/namei.c | 13 ++ + fs/ext3/super.c | 17 ++ + include/linux/ext3_fs.h | 2 + 7 files changed, 304 insertions(+), 1 deletion(-) + +Index: linux-stage/fs/ext3/Makefile +=================================================================== +--- linux-stage.orig/fs/ext3/Makefile 2004-05-07 16:00:16.000000000 -0400 ++++ linux-stage/fs/ext3/Makefile 2004-05-07 16:00:17.000000000 -0400 +@@ -4,7 +4,7 @@ + + obj-$(CONFIG_EXT3_FS) += ext3.o + +-ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ++ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +Index: linux-stage/fs/ext3/inode.c +=================================================================== +--- linux-stage.orig/fs/ext3/inode.c 2004-05-07 16:00:16.000000000 -0400 ++++ linux-stage/fs/ext3/inode.c 2004-05-07 17:21:59.000000000 -0400 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -2472,6 +2473,9 @@ + ei->i_acl = EXT3_ACL_NOT_CACHED; + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif ++ if (ext3_iopen_get_inode(inode)) ++ return; ++ + if (ext3_get_inode_loc(inode, &iloc, 0)) + goto bad_inode; + bh = iloc.bh; +Index: linux-stage/fs/ext3/iopen.c +=================================================================== +--- linux-stage.orig/fs/ext3/iopen.c 2004-05-07 16:00:17.000000000 -0400 ++++ linux-stage/fs/ext3/iopen.c 2004-05-07 17:22:37.000000000 -0400 +@@ -0,0 +1,272 @@ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ * ++ * ++ * Invariants: ++ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias ++ * for an inode at one time. ++ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry ++ * aliases on an inode at the same time. ++ * ++ * If we have any connected dentry aliases for an inode, use one of those ++ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED ++ * dentry for this inode, which thereafter will be found by the dcache ++ * when looking up this inode number in __iopen__, so we don't return here ++ * until it is gone. ++ * ++ * If we get an inode via a regular name lookup, then we "rename" the ++ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures ++ * existing users of the disconnected dentry will continue to use the same ++ * dentry as the connected users, and there will never be both kinds of ++ * dentry aliases at one time. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ //ino != EXT3_ACL_IDX_INO && ++ //ino != EXT3_ACL_DATA_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ alternate->d_vfs_flags |= DCACHE_REFERENCED; ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_DISCONNECTED; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++ ++ __d_rehash(dentry, 0); /* d_rehash */ ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++#define do_switch(x,y) do { \ ++ __typeof__ (x) __tmp = x; \ ++ x = y; y = __tmp; } while (0) ++ ++static inline void switch_names(struct dentry *dentry, struct dentry *target) ++{ ++ const unsigned char *old_name, *new_name; ++ ++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); ++ old_name = target->d_name.name; ++ new_name = dentry->d_name.name; ++ if (old_name == target->d_iname) ++ old_name = dentry->d_iname; ++ if (new_name == dentry->d_iname) ++ new_name = target->d_iname; ++ target->d_name.name = new_name; ++ dentry->d_name.name = old_name; ++} ++ ++/* This function is spliced into ext3_lookup and does the move of a ++ * disconnected dentry (if it exists) to a connected dentry. ++ */ ++struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, ++ int rehash) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* verify this dentry is really new */ ++ assert(dentry->d_inode == NULL); ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ if (rehash) ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ assert(list_empty(&dentry->d_subdirs)); ++ ++ spin_lock(&dcache_lock); ++ if (!inode) ++ goto do_rehash; ++ ++ /* preferrably return a connected dentry */ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ ++ if (!goal) ++ goto do_instantiate; ++ ++ /* Move the goal to the de hash queue */ ++ goal->d_flags &= ~ DCACHE_DISCONNECTED; ++ security_d_instantiate(goal, inode); ++ __d_rehash(dentry, 0); ++ __d_move(goal, dentry); ++ spin_unlock(&dcache_lock); ++ iput(inode); ++ ++ return goal; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++do_instantiate: ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++do_rehash: ++ if (rehash) ++ __d_rehash(dentry, 0); /* d_rehash */ ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode *inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ EXT3_I(inode)->i_dtime = 0; ++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size ++ * (for stat), not the fs block ++ * size */ ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +Index: linux-stage/fs/ext3/iopen.h +=================================================================== +--- linux-stage.orig/fs/ext3/iopen.h 2004-05-07 16:00:17.000000000 -0400 ++++ linux-stage/fs/ext3/iopen.h 2004-05-07 16:00:17.000000000 -0400 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode *inode); ++extern struct dentry *iopen_connect_dentry(struct dentry *dentry, ++ struct inode *inode, int rehash); +Index: linux-stage/fs/ext3/namei.c +=================================================================== +--- linux-stage.orig/fs/ext3/namei.c 2004-05-07 16:00:16.000000000 -0400 ++++ linux-stage/fs/ext3/namei.c 2004-05-07 16:00:17.000000000 -0400 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -979,6 +980,9 @@ + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -989,10 +993,8 @@ + if (!inode) + return ERR_PTR(-EACCES); + } +- if (inode) +- return d_splice_alias(inode, dentry); +- d_add(dentry, inode); +- return NULL; ++ ++ return iopen_connect_dentry(dentry, inode, 1); + } + + +@@ -2019,10 +2021,6 @@ + inode->i_nlink); + inode->i_version++; + inode->i_nlink = 0; +- /* There's no need to set i_disksize: the fact that i_nlink is +- * zero will ensure that the right thing happens during any +- * recovery. */ +- inode->i_size = 0; + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); +@@ -2139,6 +2137,23 @@ + return err; + } + ++/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ ++static int ext3_add_link(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ int err = ext3_add_entry(handle, dentry, inode); ++ if (!err) { ++ err = ext3_mark_inode_dirty(handle, inode); ++ if (err == 0) { ++ (void)iopen_connect_dentry(dentry, inode, 0); ++ return 0; ++ } ++ } ++ ext3_dec_count(handle, inode); ++ iput(inode); ++ return err; ++} ++ + static int ext3_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) + { +@@ -2161,7 +2176,8 @@ + ext3_inc_count(handle, inode); + atomic_inc(&inode->i_count); + +- err = ext3_add_nondir(handle, dentry, inode); ++ err = ext3_add_link(handle, dentry, inode); ++ ext3_orphan_del(handle,inode); + ext3_journal_stop(handle); + return err; + } +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2004-05-07 16:00:16.000000000 -0400 ++++ linux-stage/fs/ext3/super.c 2004-05-07 17:21:59.000000000 -0400 +@@ -536,7 +536,7 @@ + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload, + Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, +- Opt_ignore, Opt_err, ++ Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + }; + + static match_table_t tokens = { +@@ -575,6 +575,9 @@ + {Opt_ignore, "noquota"}, + {Opt_ignore, "quota"}, + {Opt_ignore, "usrquota"}, ++ {Opt_iopen, "iopen"}, ++ {Opt_noiopen, "noiopen"}, ++ {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_err, NULL} + }; + +@@ -762,6 +765,18 @@ + case Opt_abort: + set_opt(sbi->s_mount_opt, ABORT); + break; ++ case Opt_iopen: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_noiopen: ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_iopen_nopriv: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; + case Opt_ignore: + break; + default: +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h 2004-05-07 16:00:16.000000000 -0400 ++++ linux-stage/include/linux/ext3_fs.h 2004-05-07 16:00:17.000000000 -0400 +@@ -325,6 +325,8 @@ + #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ + #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ + #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ ++#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series new file mode 100644 index 0000000..cff99dd --- /dev/null +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series @@ -0,0 +1,9 @@ +ext3-wantedi-2.6-suse.patch +ext3-san-jdike-2.6-suse.patch +iopen-2.6-suse.patch +export_symbols-ext3-2.6-suse.patch +ext3-map_inode_page-2.6-suse.patch +ext3-init-generation-2.6-suse.patch +ext3-ea-in-inode-2.6-suse.patch +export-ext3-2.6-suse.patch +ext3-include-fixes-2.6-suse.patch diff --git a/ldiskfs/ldiskfs/Makefile.in b/ldiskfs/ldiskfs/Makefile.in new file mode 100644 index 0000000..7d1e229 --- /dev/null +++ b/ldiskfs/ldiskfs/Makefile.in @@ -0,0 +1,19 @@ +default: all + +MODULES := ldiskfs + +# copy makefile over to not break patches +ext3_extra := $(wildcard @LINUX@/fs/ext3/Makefile) + +ext3_headers := $(wildcard @LINUX@/fs/ext3/*.h) +linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h) + +ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c)) +new_sources := iopen.c iopen.h +ldiskfs_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) + +ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o)) + +EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs + +@INCLUDE_RULES@ diff --git a/ldiskfs/ldiskfs/autoMakefile.am b/ldiskfs/ldiskfs/autoMakefile.am new file mode 100644 index 0000000..b24081e --- /dev/null +++ b/ldiskfs/ldiskfs/autoMakefile.am @@ -0,0 +1,64 @@ +if LDISKFS +modulefs_DATA = ldiskfs$(KMODEXT) +endif + +ldiskfs_linux_headers := $(addprefix linux/,$(subst ext3,ldiskfs,$(notdir $(linux_headers)))) + +$(filter %.c,$(ldiskfs_sources)): sources $(ldiskfs_linux_headers) $(filter %.h,$(ldiskfs_sources)) + +ldiskfs_sed_flags = \ + -e "s/dx_hash_info/ext3_dx_hash_info/g" \ + -e "s/dir_private_info/ext3_dir_private_info/g" \ + -e "s/DX_HASH/EXT3_DX_HASH/g" \ + -e "s/EXT3/LDISKFS/g" -e "s/ext3/ldiskfs/g" + +%.c: linux-stage/fs/ext3/%.c + sed $(strip $(ldiskfs_sed_flags)) $< > $@ + +%.h: linux-stage/fs/ext3/%.h + sed $(strip $(ldiskfs_sed_flags)) $< > $@ + +linux/ldiskfs%.h: linux-stage/include/linux/ext3%.h + sed $(strip $(ldiskfs_sed_flags)) $< > $@ + +# +# FIXME: we need to grab the series in configure somehow +# (see bug 1679) +# +series := @top_srcdir@/kernel_patches/series/ldiskfs-2.6-suse.series + +sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series) + rm -rf linux-stage linux sources $(ldiskfs_SOURCES) + mkdir -p linux-stage/fs/ext3 linux-stage/include/linux + cd linux-stage && quilt setup -l ../$(series) + cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3 + cp $(linux_headers) linux-stage/include/linux + cd linux-stage && quilt push -a -q + mkdir linux + @echo -n "Replacing 'ext3' with 'ldiskfs':" + @for i in $(notdir $(ext3_headers) $(ext3_sources)) $(new_sources) ; do \ + echo -n " $$i" ; \ + sed $(strip $(ldiskfs_sed_flags)) \ + linux-stage/fs/ext3/$$i > $$i ; \ + done + @for i in $(subst ext3,,$(notdir $(linux_headers))) ; do \ + echo -n " ext3$$i" ; \ + sed $(strip $(ldiskfs_sed_flags)) \ + linux-stage/include/linux/ext3$$i \ + > linux/ldiskfs$$i ; \ + done + touch sources + +foo-check: + @echo "ldiskfs_sources: $(ldiskfs_sources)" + @echo "ldiskfs_SOURCES: $(ldiskfs_SOURCES)" + @echo "ldiskfs_headers: $(ldiskfs_headers)" + @echo "ldiskfs_objects: $(ldiskfs_objects)" + @echo "ldiskfs_OBJECTS: $(ldiskfs_OBJECTS)" + @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)" + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +CLEANFILES = sources *.c *.h + +clean: clean-am + rm -rf linux linux-stage diff --git a/lnet/.cvsignore b/lnet/.cvsignore index c1a9bdf..f30d862 100644 --- a/lnet/.cvsignore +++ b/lnet/.cvsignore @@ -1,9 +1,11 @@ Kernelenv Makefile -Makefile.in +autoMakefile +autoMakefile.in aclocal.m4 autom4te.cache config.log config.status configure -.*.o.cmd +.*.cmd +.depend diff --git a/lnet/Makefile.am b/lnet/Makefile.am deleted file mode 100644 index 0083ac1..0000000 --- a/lnet/Makefile.am +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = Rules.linux archdep.m4 include -DIST_SUBDIRS = libcfs portals knals unals utils tests doc router - -if LIBLUSTRE -SUBDIRS = portals unals utils -else - -if CRAY_PORTALS -SUBDIRS = libcfs tests doc -else -SUBDIRS = libcfs portals knals unals utils tests doc router -endif - -endif diff --git a/lnet/Makefile.in b/lnet/Makefile.in new file mode 100644 index 0000000..71d0dc8 --- /dev/null +++ b/lnet/Makefile.in @@ -0,0 +1,9 @@ +subdir-m += libcfs + +cray-subdirs += portals +cray-subdirs += knals +cray-subdirs += router +cray-subdirs += tests +@CRAY_PORTALS_FALSE@subdir-m += $(cray-subdirs) + +@INCLUDE_RULES@ diff --git a/lnet/Rules.linux b/lnet/Rules.linux deleted file mode 100644 index 232a248..0000000 --- a/lnet/Rules.linux +++ /dev/null @@ -1,29 +0,0 @@ -# included in Linux kernel directories -# Rules for module building - -if LINUX25 - -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g') -AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) - -$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) - $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS) - rm -f $(MODULE)_tmp.c - $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o - $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o - -else - -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -endif - -tags: - rm -f $(top_srcdir)/TAGS - rm -f $(top_srcdir)/tags - find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a - find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index b203a95..636ee1d 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -1,25 +1,55 @@ +# -------- we can't build modules unless srcdir = builddir +if test x$enable_modules != xno ; then +AC_CHECK_FILE([autoMakefile.am],[], + [AC_MSG_ERROR([At this time, Lustre does not support building kernel modules with srcdir != buildir.])]) +fi # -------- in kernel compilation? (2.5 only) ------------- -AC_ARG_ENABLE(inkernel, [ --enable-inkernel set up 2.5 kernel makefiles]) +AC_MSG_CHECKING([if inkernel build support is requested]) +AC_ARG_ENABLE([inkernel], + AC_HELP_STRING([--enable-inkernel], + [set up 2.5 kernel makefiles]), + [],[enable_inkernel=no]) +AC_MSG_RESULT([$enable_inkernel]) AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes) -echo "Makefile for in kernel build: $INKERNEL" # -------- are we building against an external portals? ------- -# haha, I wonder how one is really supposed to do this -# automake seems to have a DEFS variable which looks good -AC_ARG_WITH(cray-portals, [ --with-cray-portals=[path] path to cray portals], - CRAY_PORTALS_INCLUDE="-I$with_cray_portals" - CC="$CC -DCRAY_PORTALS=1" - ) -AC_SUBST(CRAY_PORTALS_INCLUDE) -AM_CONDITIONAL(CRAY_PORTALS, test ! "x$with_cray_portals" = x) - -# -------- liblustre compilation -------------- -AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib") +AC_MSG_CHECKING([if Cray portals should be used]) +AC_ARG_WITH([cray-portals], + AC_HELP_STRING([--with-cray-portals=path], + [path to cray portals]), + [ + if test "$with_cray_portals" != no; then + if test -r $with_cray_portals/include/portals/api.h ; then + CRAY_PORTALS_INCLUDE="-I$with_cray_portals/include" + AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals]) + else + AC_MSG_ERROR([--with-cray-portals specified badly]) + fi + fi + ],[with_cray_portals=no]) +AC_MSG_RESULT([$with_cray_portals]) + +AM_CONDITIONAL(CRAY_PORTALS, test x$with_cray_portals != xno) + +# -------- enable tests and utils? ------- +if test x$enable_tests = xno ; then + AC_MSG_NOTICE([disabling tests]) + enable_tests=no +fi +if test x$enable_utils = xno ; then + AC_MSG_NOTICE([disabling utilities]) + enable_utils=no +fi # -------- set linuxdir ------------ - -AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux) +AC_MSG_CHECKING([for Linux sources]) +AC_ARG_WITH([linux], + AC_HELP_STRING([--with-linux=path], + [set path to Linux source (default=/usr/src/linux)]), + [LINUX=$with_linux], + [LINUX=/usr/src/linux]) +AC_MSG_RESULT([$LINUX]) AC_SUBST(LINUX) if test x$enable_inkernel = xyes ; then echo ln -s `pwd` $LINUX/fs/lustre @@ -27,201 +57,196 @@ if test x$enable_inkernel = xyes ; then ln -s `pwd` $LINUX/fs/lustre fi -# -------------------- -AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...) -if test $host_cpu = "lib" ; then - host_cpu="lib" - AC_MSG_RESULT(no building Lustre library) -else - if test -e $LINUX/include/asm-um ; then - if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then - host_cpu="um"; - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT(no (asm doesn't point at asm-um)) - fi +# -------- check for .confg -------- +AC_ARG_WITH([linux-config], + [AC_HELP_STRING([--with-linux-config=path], + [set path to Linux .conf (default=\$LINUX/.config)])], + [LINUX_CONFIG=$with_linux_config], + [LINUX_CONFIG=$LINUX/.config]) +AC_SUBST(LINUX_CONFIG) - else - AC_MSG_RESULT(no (asm-um missing)) - fi -fi +AC_CHECK_FILE([/boot/kernel.h], + [KERNEL_SOURCE_HEADER='/boot/kernel.h'], + [AC_CHECK_FILE([/var/adm/running-kernel.h]), + [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h']]) -# --------- Linux 25 ------------------ +AC_ARG_WITH([kernel-source-header], + AC_HELP_STRING([--with-kernel-source-header=path], + [Use a different kernel version header. Consult README.kernel-source for details.]), + [KERNEL_SOURCE_HEADER=$with_kernel_source_header]) -AC_MSG_CHECKING(if you are running linux 2.5) -if test -e $LINUX/include/linux/namei.h ; then - linux25="yes" - AC_MSG_RESULT(yes) -else - linux25="no" - AC_MSG_RESULT(no) +# -------------------- +ARCH_UM= +UML_CFLAGS= +if test x$enable_modules != xno ; then + AC_MSG_CHECKING([if you are running user mode linux for $host_cpu]) + if test -e $LINUX/include/asm-um ; then + if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then + ARCH_UM='ARCH=um' + # see notes in Rules.in + UML_CFLAGS='-O0' + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT([no (asm doesn't point at asm-um)]) + fi + else + AC_MSG_RESULT([no (asm-um missing)]) + fi fi +AC_SUBST(ARCH_UM) +AC_SUBST(UML_CFLAGS) +# --------- Linux 25 ------------------ + +AC_CHECK_FILE([$LINUX/include/linux/namei.h], + [ + linux25="yes" + KMODEXT=".ko" + ],[ + KMODEXT=".o" + linux25="no" + ]) +AC_MSG_CHECKING([if you are using Linux 2.6]) +AC_MSG_RESULT([$linux25]) AM_CONDITIONAL(LINUX25, test x$linux25 = xyes) -echo "Makefiles for in linux 2.5 build: $LINUX25" +AC_SUBST(KMODEXT) # ------- Makeflags ------------------ -AC_MSG_CHECKING(setting make flags system architecture: ) -case ${host_cpu} in - lib ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -Wall ' - KCPPFLAGS='-D__arch_lib__ ' - libdir='${exec_prefix}/lib/lustre' - MOD_LINK=elf_i386 -;; - um ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common ' - case ${linux25} in - yes ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include' - ;; - * ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include ' - ;; - esac - - MOD_LINK=elf_i386 -;; - i*86 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -pipe' - case ${linux25} in - yes ) - KCPPFLAGS='-D__KERNEL__ -DMODULE -march=i686 -I$(LINUX)/include/asm-i386/mach-default -nostdinc -iwithprefix include ' - ;; - * ) - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - ;; - esac - MOD_LINK=elf_i386 -;; - - alphaev6 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - alphaev67 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - alpha* ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev5' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - ia64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' - KCPPFLAGS='-D__KERNEL__ -DMODULE' - MOD_LINK=elf64_ia64 -;; - - x86_64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables' - KCPPFLAGS='-D__KERNEL__ -DMODULE' - MOD_LINK=elf_x86_64 -;; - - sparc64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs' - KCPPFLAGS='-D__KERNEL__' - MOD_LINK=elf64_sparc - -;; - - powerpc ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-O2 -g -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring' - KCPPFLAGS='-D__KERNEL__ -DMODULE' - MOD_LINK=elf32ppclinux -;; - - *) - AC_ERROR("Unknown Linux Platform: $host_cpu") -;; -esac - -# ----------- make dep run? ------------------ - -if test $host_cpu != "lib" ; then - AC_MSG_CHECKING(if make dep has been run in kernel source (host $host_cpu) ) - if test -f $LINUX/include/linux/config.h ; then - AC_MSG_RESULT(yes) - else - AC_MSG_ERROR(** cannot find $LINUX/include/linux/config.h. Run make dep in $LINUX.) - fi +CPPFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include" + +# liblustre are all the same +LLCPPFLAGS="-D__arch_lib__ -D_LARGEFILE64_SOURCE=1" +AC_SUBST(LLCPPFLAGS) + +LLCFLAGS="-g -Wall -fPIC" +AC_SUBST(LLCFLAGS) + +# everyone builds against portals and lustre + +if test x$enable_ldiskfs = xyes ; then + AC_DEFINE(CONFIG_LDISKFS_FS_MODULE, 1, [build ldiskfs as a module]) + AC_DEFINE(CONFIG_LDISKFS_FS_XATTR, 1, [enable extended attributes for ldiskfs]) + AC_DEFINE(CONFIG_LDISKFS_FS_POSIX_ACL, 1, [enable posix acls]) + AC_DEFINE(CONFIG_LDISKFS_FS_SECURITY, 1, [enable fs security]) fi +EXTRA_KCFLAGS="-g $CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I$PWD/portals/include -I$PWD/include" + +# these are like AC_TRY_COMPILE, but try to build modules against the +# kernel, inside the kernel-tests directory + +AC_DEFUN([LUSTRE_MODULE_CONFTEST], +[cat >conftest.c <<_ACEOF +$1 +_ACEOF +]) + +AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE], +[m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl +rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko +AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="$EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])], + [$4], + [_AC_MSG_LOG_CONFTEST +m4_ifvaln([$5],[$5])dnl])dnl +rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.mod.o kernel-tests/conftest.ko m4_ifval([$1], [kernel-tests/conftest.c conftest.c])[]dnl +]) + +AC_DEFUN([LUSTRE_MODULE_TRY_COMPILE], +[LUSTRE_MODULE_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[$1]], [[$2]])], + [modules], + [test -s kernel-tests/conftest.o], + [$3], [$4])]) + +AC_DEFUN([LUSTRE_MODULE_TRY_MAKE], +[LUSTRE_MODULE_COMPILE_IFELSE([AC_LANG_PROGRAM([[$1]], [[$2]])], [$3], [$4], [$5], [$6])]) + # ------------ include paths ------------------ -KINCFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE \ - -I\$(top_srcdir)/include \ - -I\$(top_srcdir)/portals/include" -if test $host_cpu != "lib" ; then - KINCFLAGS="$KINCFLAGS -I$LINUX/include -I$LINUX/include" -fi -CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS" - -if test $host_cpu != "lib" ; then -# ------------ autoconf.h ------------------ - AC_MSG_CHECKING(if autoconf.h is in kernel source) - if test -f $LINUX/include/linux/autoconf.h ; then - AC_MSG_RESULT(yes) - else - AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.) - fi - -# ------------ LINUXRELEASE and moduledir ------------------ - AC_MSG_CHECKING(for Linux release) - - dnl We need to rid ourselves of the nasty [ ] quotes. - changequote(, ) - dnl Get release from version.h - LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" - changequote([, ]) - - moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel - AC_SUBST(moduledir) - - modulefsdir='$(moduledir)/fs/$(PACKAGE)' - AC_SUBST(modulefsdir) - - AC_MSG_RESULT($LINUXRELEASE) - AC_SUBST(LINUXRELEASE) - -# ------------ RELEASE -------------------------------- - AC_MSG_CHECKING(lustre release) - - dnl We need to rid ourselves of the nasty [ ] quotes. - changequote(, ) - dnl Get release from version.h - RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`" - changequote([, ]) - - AC_MSG_RESULT($RELEASE) - AC_SUBST(RELEASE) - -# ---------- modversions? -------------------- - AC_MSG_CHECKING(for MODVERSIONS) - if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1; - then - if test $linux25 != "yes"; then - MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" - AC_MSG_RESULT(yes) - fi - fi +if test x$enable_modules != xno ; then + # ------------ .config exists ---------------- + AC_CHECK_FILE([$LINUX_CONFIG],[], + [AC_MSG_ERROR([Kernel config could not be found. If you are building from a kernel-source rpm consult README.kernel-source])]) + + # ----------- make dep run? ------------------ + AC_CHECK_FILES([$LINUX/include/linux/autoconf.h + $LINUX/include/linux/version.h + $LINUX/include/linux/config.h],[], + [AC_MSG_ERROR([Run make config in $LINUX.])]) + + # ------------ rhconfig.h includes runtime-generated bits -- + # red hat kernel-source checks + + # we know this exists after the check above. if the user + # tarred up the tree and ran make dep etc. in it, then + # version.h gets overwritten with a standard linux one. + + if grep rhconfig $LINUX/include/linux/version.h >/dev/null ; then + # This is a clean kernel-source tree, we need to + # enable extensive workarounds to get this to build + # modules + AC_CHECK_FILE([$KERNEL_SOURCE_HEADER], + [if test $KERNEL_SOURCE_HEADER = '/boot/kernel.h' ; then + AC_MSG_WARN([Using /boot/kernel.h from RUNNING kernel.]) + AC_MSG_WARN([If this is not what you want, use --with-kernel-source-header.]) + AC_MSG_WARN([Consult README.kernel-source for details.]) + fi], + [AC_MSG_ERROR([$KERNEL_SOURCE_HEADER not found. Consult README.kernel-source for details.])]) + EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS" + fi + + # --- check that we can build modules at all + AC_MSG_CHECKING([that modules can be built]) + LUSTRE_MODULE_TRY_COMPILE([],[], + [ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_WARN([Consult config.log for details.]) + AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source]) + AC_MSG_ERROR([Kernel modules could not be built.]) + ]) + + # ------------ LINUXRELEASE and moduledir ------------------ + AC_MSG_CHECKING([for Linux release]) + rm -f kernel-tests/conftest.i + LINUXRELEASE= + if test $linux25 = 'yes' ; then + makerule="$PWD/kernel-tests" + else + makerule="_dir_$PWD/kernel-tests" + fi + LUSTRE_MODULE_TRY_MAKE( + [#include ], + [LINUXRELEASE=UTS_RELEASE], + [$makerule LUSTRE_KERNEL_TEST=conftest.i], + [test -s kernel-tests/conftest.i], + [ + # LINUXRELEASE="UTS_RELEASE" + eval $(grep LINUXRELEASE kernel-tests/conftest.i) + ],[ + AC_MSG_RESULT([unknown]) + AC_MSG_ERROR([Could not preprocess test program. Consult config.log for details.]) + ]) + rm -f kernel-tests/conftest.i + if test x$LINUXRELEASE = x ; then + AC_MSG_RESULT([unknown]) + AC_MSG_ERROR([Could not determine Linux release version from linux/version.h.]) + fi + AC_MSG_RESULT([$LINUXRELEASE]) + AC_SUBST(LINUXRELEASE) + + moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel + AC_SUBST(moduledir) + + modulefsdir='$(moduledir)/fs/$(PACKAGE)' + AC_SUBST(modulefsdir) + + # ------------ RELEASE -------------------------------- + AC_MSG_CHECKING([for Lustre release]) + RELEASE="`echo ${LINUXRELEASE} | tr '-' '_'`_`date +%Y%m%d%H%M`" + AC_MSG_RESULT($RELEASE) + AC_SUBST(RELEASE) fi # ---------- Portals flags -------------------- @@ -234,166 +259,299 @@ fi #fi #AC_SUBST(usrprefix) -AC_MSG_CHECKING(if kernel has CPU affinity support) -SET_CPUS_ALLOW="`grep -c set_cpus_allowed $LINUX/kernel/softirq.c`" -if test "$SET_CPUS_ALLOW" != 0 ; then - enable_affinity_temp="-DCPU_AFFINITY=1" - AC_MSG_RESULT(yes) +AC_MSG_CHECKING([for zero-copy TCP support]) +AC_ARG_ENABLE([zerocopy], + AC_HELP_STRING([--disable-zerocopy], + [disable socknal zerocopy]), + [],[enable_zerocopy='yes']) +if test x$enable_zerocopy = xno ; then + AC_MSG_RESULT([no (by request)]) else - enable_affinity_temp="" - AC_MSG_RESULT(no) + ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`" + if test "$ZCCD" != 0 ; then + AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT([no (no kernel support)]) + fi fi -AC_MSG_CHECKING(if kernel has zero-copy TCP support) -ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`" -if test "$ZCCD" != 0 ; then - enable_zerocopy_temp="-DSOCKNAL_ZC=1" - AC_MSG_RESULT(yes) +AC_MSG_CHECKING([for CPU affinity support]) +AC_ARG_ENABLE([affinity], + AC_HELP_STRING([--disable-affinity], + [disable process/irq affinity]), + [],[enable_affinity='yes']) +if test x$enable_affinity = xno ; then + AC_MSG_RESULT([no (by request)]) else - enable_zerocopy_temp="" - AC_MSG_RESULT(no) + SET_CPUS_ALLOW="`grep -c set_cpus_allowed $LINUX/kernel/softirq.c`" + if test "$SET_CPUS_ALLOW" != 0 ; then + AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no (no kernel support)]) + fi fi -AC_ARG_ENABLE(zerocopy, [ --disable-zerocopy disable socknal zerocopy],enable_zerocopy="", enable_zerocopy=$enable_zerocopy_temp) -AC_ARG_ENABLE(affinity, [ --disable-affinity disable process/irq affinity],enable_affinity="", enable_affinity=$enable_affinity_temp) ##################################### -AC_MSG_CHECKING(if quadrics kernel headers are present) +AC_MSG_CHECKING([if quadrics kernel headers are present]) if test -d $LINUX/drivers/net/qsnet ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - AC_MSG_CHECKING(for multirail EKC) - if test -f $LINUX/include/elan/epcomms.h; then - AC_MSG_RESULT(supported) - with_quadrics="-DMULTIRAIL_EKC=1" - else - AC_MSG_RESULT(not supported) - with_quadrics="-I$LINUX/drivers/net/qsnet/include" - fi - : + AC_MSG_RESULT([yes]) + QSWNAL="qswnal" + AC_MSG_CHECKING([for multirail EKC]) + if test -f $LINUX/include/elan/epcomms.h; then + AC_MSG_RESULT([supported]) + QSWCPPFLAGS="-DMULTIRAIL_EKC=1" + else + AC_MSG_RESULT([not supported]) + if test -d $LINUX/drivers/net/qsnet/include; then + QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include" + else + QSWCPPFLAGS="-I$LINUX/include/linux" + fi + fi else - AC_MSG_RESULT(no) - QSWNAL="" - with_quadrics="" - : + AC_MSG_RESULT([no]) + QSWNAL="" + QSWCPPFLAGS="" fi -AC_SUBST(with_quadrics) +AC_SUBST(QSWCPPFLAGS) AC_SUBST(QSWNAL) - -# R. Read 5/02 -GMNAL="" -echo "checking with-gm=" ${with_gm} -if test "${with_gm+set}" = set; then - if test "${with_gm}" = yes; then - with_gm="-I/usr/local/gm/include" - else - with_gm="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm" - fi - GMNAL="gmnal" -else -# default case - no GM - with_gm="" -fi -AC_SUBST(with_gm) +AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal") + +AC_MSG_CHECKING([if gm support was requested]) +AC_ARG_WITH([gm], + AC_HELP_STRING([--with-gm=path], + [build gmnal against path]), + [ + case $with_gm in + yes) + AC_MSG_RESULT([yes]) + GMCPPFLAGS="-I/usr/local/gm/include" + GMNAL="gmnal" + ;; + no) + AC_MSG_RESULT([no]) + GMCPPFLAGS="" + GMNAL="" + ;; + *) + AC_MSG_RESULT([yes]) + GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm" + GMNAL="gmnal" + ;; + esac + ],[ + AC_MSG_RESULT([no]) + GMCPPFLAGS="" + GMNAL="" + ]) +AC_SUBST(GMCPPFLAGS) AC_SUBST(GMNAL) - +AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal") #fixme: where are the default IB includes? default_ib_include_dir=/usr/local/ib/include an_ib_include_file=vapi.h -AC_ARG_WITH(ib, [ --with-ib=[yes/no/path] Path to IB includes], with_ib=$withval, with_ib=$default_ib) -AC_MSG_CHECKING(if IB headers are present) -if test "$with_ib" = yes; then - with_ib=$default_ib_include_dir -fi -if test "$with_ib" != no -a -f ${with_ib}/${an_ib_include_file}; then - AC_MSG_RESULT(yes) - IBNAL="ibnal" - with_ib="-I${with_ib}" -else - AC_MSG_RESULT(no) - IBNAL="" - with_ib="" -fi +AC_MSG_CHECKING([if ib nal support was requested]) +AC_ARG_WITH([ib], + AC_HELP_STRING([--with-ib=yes/no/path], + [Path to IB includes]), + [ + case $with_ib in + yes) + AC_MSG_RESULT([yes]) + IBCPPFLAGS="-I/usr/local/ib/include" + IBNAL="ibnal" + ;; + no) + AC_MSG_RESULT([no]) + IBCPPFLAGS="" + IBNAL="" + ;; + *) + AC_MSG_RESULT([yes]) + IBCPPFLAGS="-I$with_ib" + IBNAL="" + ;; + esac + ],[ + AC_MSG_RESULT([no]) + IBFLAGS="" + IBNAL="" + ]) AC_SUBST(IBNAL) -AC_SUBST(with_ib) - +AC_SUBST(IBCPPFLAGS) +AM_CONDITIONAL(BUILD_IBNAL, test x$IBNAL = "xibnal") -def_scamac=/opt/scali/include -AC_ARG_WITH(scamac, [ --with-scamac=[yes/no/path] Path to ScaMAC includes (default=/opt/scali/include)], with_scamac=$withval, with_scamac=$def_scamac) -AC_MSG_CHECKING(if ScaMAC headers are present) -if test "$with_scamac" = yes; then - with_scamac=$def_scamac -fi -if test "$with_scamac" != no -a -f ${with_scamac}/scamac.h; then - AC_MSG_RESULT(yes) - SCIMACNAL="scimacnal" - with_scamac="-I${with_scamac} -I${with_scamac}/icm" -else - AC_MSG_RESULT(no) - SCIMACNAL="" - with_scamac="" -fi - -AC_SUBST(with_scamac) -AC_SUBST(SCIMACNAL) - -CFLAGS="$KCFLAGS" -CPPFLAGS="$KINCFLAGS $KCPPFLAGS $MFLAGS $enable_zerocopy $enable_affinity $with_quadrics $with_gm $with_scamac $with_ib" - -AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib) AC_SUBST(MOD_LINK) AC_SUBST(LINUX25) -AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib) # ---------- Red Hat 2.4.18 has iobuf->dovary -------------- # But other kernels don't AC_MSG_CHECKING([if struct kiobuf has a dovary field]) -AC_TRY_COMPILE([#define __KERNEL__ - #include ], - [struct kiobuf iobuf; - iobuf.dovary = 1;], - [AC_MSG_RESULT([yes]) - CPPFLAGS="$CPPFLAGS -DHAVE_KIOBUF_DOVARY"], - [AC_MSG_RESULT([no])]) +LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + struct kiobuf iobuf; + iobuf.dovary = 1; + ],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field]) + ],[ + AC_MSG_RESULT([no]) + ]) + +# ----------- 2.6.4 no longer has page->list --------------- +AC_MSG_CHECKING([if struct page has a list field]) +LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + struct page page; + &page.list; + ],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field]) + ],[ + AC_MSG_RESULT([no]) + ]) # ---------- Red Hat 2.4.20 backports some 2.5 bits -------- # This needs to run after we've defined the KCPPFLAGS -AC_MSG_CHECKING(for kernel version) -AC_TRY_COMPILE([#define __KERNEL__ - #include ], - [struct task_struct p; - p.sighand = NULL;], - [RH_2_4_20=1], - [RH_2_4_20=0]) - -if test $RH_2_4_20 = 1; then - AC_MSG_RESULT(redhat-2.4.20) - CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20" -else - AC_MSG_RESULT($LINUXRELEASE) -fi +AC_MSG_CHECKING([for kernel version]) +LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + struct task_struct p; + p.sighand = NULL; + ],[ + AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches]) + AC_MSG_RESULT([redhat-2.4.20]) + ],[ + AC_MSG_RESULT([$LINUXRELEASE]) + ]) # ---------- Red Hat 2.4.21 backports some more 2.5 bits -------- -AC_MSG_CHECKING(if kernel defines PDE) +AC_MSG_CHECKING([if kernel defines PDE]) HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`" if test "$HAVE_PDE" != 0 ; then - CPPFLAGS="$CPPFLAGS -DHAVE_PDE" - AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE]) + AC_MSG_RESULT([yes]) else - AC_MSG_RESULT(no) + AC_MSG_RESULT([no]) fi -AC_MSG_CHECKING(if kernel passes struct file to direct_IO) +AC_MSG_CHECKING([if kernel passes struct file to direct_IO]) HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`" if test "$HAVE_DIO_FILE" != 0 ; then - CPPFLAGS="$CPPFLAGS -DHAVE_DIO_FILE" - AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO]) + AC_MSG_RESULT(yes) else - AC_MSG_RESULT(no) + AC_MSG_RESULT(no) +fi + +if test x$enable_modules != xno ; then + # ---------- modules? ------------------------ + AC_MSG_CHECKING([for module support]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + #ifndef CONFIG_MODULES + #error CONFIG_MODULES not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([module support is required to build Lustre kernel modules.]) + ]) + + # ---------- modversions? -------------------- + AC_MSG_CHECKING([for MODVERSIONS]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + #ifndef CONFIG_MODVERSIONS + #error CONFIG_MODVERSIONS not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + ]) + + # ------------ preempt ----------------------- + AC_MSG_CHECKING([if preempt is enabled]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + #ifndef CONFIG_PREEMPT + #error CONFIG_PREEMPT is not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + AC_MSG_ERROR([Lustre does not support kernels with preempt enabled.]) + ],[ + AC_MSG_RESULT([no]) + ]) + + if test $BACKINGFS = 'ext3' ; then + # --- Check that ext3 and ext3 xattr are enabled in the kernel + AC_MSG_CHECKING([that ext3 is enabled in the kernel]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + #ifndef CONFIG_EXT3_FS + #ifndef CONFIG_EXT3_FS_MODULE + #error CONFIG_EXT3_FS not #defined + #endif + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel (CONFIG_EXT3_FS)]) + ]) + + AC_MSG_CHECKING([that extended attributes for ext3 are enabled in the kernel]) + LUSTRE_MODULE_TRY_COMPILE( + [ + #include + ],[ + #ifndef CONFIG_EXT3_FS_XATTR + #error CONFIG_EXT3_FS_XATTR not #defined + #endif + ],[ + AC_MSG_RESULT([yes]) + ],[ + AC_MSG_RESULT([no]) + AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel (CONFIG_EXT3_FS_XATTR.)]) + AC_MSG_WARN([This build may fail.]) + ]) + fi # BACKINGFS = ext3 fi + +CPPFLAGS="-include \$(top_builddir)/include/config.h $CPPFLAGS" +EXTRA_KCFLAGS="-include $PWD/include/config.h $EXTRA_KCFLAGS" +AC_SUBST(EXTRA_KCFLAGS) + +#echo "KCPPFLAGS: $KCPPFLAGS" +#echo "KCFLAGS: $KCFLAGS" +#echo "LLCPPFLAGS: $LLCPPFLAGS" +#echo "LLCFLAGS: $LLCFLAGS" +#echo "MOD_LINK: $MOD_LINK" +#echo "CFLAGS: $CFLAGS" +#echo "CPPFLAGS: $CPPFLAGS" diff --git a/lnet/klnds/Makefile.am b/lnet/autoMakefile.am similarity index 59% rename from lnet/klnds/Makefile.am rename to lnet/autoMakefile.am index df6ee5c..bd57e6e 100644 --- a/lnet/klnds/Makefile.am +++ b/lnet/autoMakefile.am @@ -3,5 +3,6 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -DIST_SUBDIRS= socknal qswnal gmnal scimacnal ibnal -SUBDIRS= socknal @QSWNAL@ @GMNAL@ @SCIMACNAL@ @IBNAL@ +EXTRA_DIST = archdep.m4 build.m4 include + +SUBDIRS = portals libcfs knals unals router tests doc utils diff --git a/lnet/build.m4 b/lnet/build.m4 index 8c55b20..93a370f 100644 --- a/lnet/build.m4 +++ b/lnet/build.m4 @@ -1,5 +1,9 @@ # ---------- other tests and settings --------- +AC_CHECK_TYPE([spinlock_t], + [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])], + [], + [#include ]) # --------- unsigned long long sane? ------- @@ -28,6 +32,9 @@ pkgexampledir='${prefix}/usr/lib/$(PACKAGE)/examples' AC_SUBST(pkgexampledir) pymoddir='${prefix}/usr/lib/${PACKAGE}/python/Lustre' AC_SUBST(pymoddir) +# for substitution in lconf +PYMOD_DIR="/usr/lib/$PACKAGE/python" +AC_SUBST(PYMOD_DIR) modulenetdir='$(moduledir)/net/$(PACKAGE)' AC_SUBST(modulenetdir) @@ -35,14 +42,15 @@ AC_SUBST(modulenetdir) # ---------- BAD gcc? ------------ AC_PROG_RANLIB AC_PROG_CC -AC_MSG_CHECKING(for buggy compiler) +AC_MSG_CHECKING([for buggy compiler]) CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"` bad_cc() { + AC_MSG_RESULT([buggy compiler found!]) echo echo " '$CC_VERSION'" echo " has been known to generate bad code, " echo " please get an updated compiler." - AC_MSG_ERROR(sorry) + AC_MSG_ERROR([sorry]) } TMP_VERSION=`echo $CC_VERSION | cut -c 1-16` if test "$TMP_VERSION" = "gcc version 2.95"; then @@ -61,7 +69,7 @@ case "$CC_VERSION" in bad_cc ;; *) - AC_MSG_RESULT(no known problems) + AC_MSG_RESULT([no known problems]) ;; esac # end ------ BAD gcc? ------------ @@ -70,29 +78,30 @@ esac # this doesn't seem to work on older autoconf # AC_CHECK_LIB(readline, readline,,) -AC_ARG_ENABLE(readline, [ --enable-readline use readline library],, - enable_readline="yes") - -if test "$enable_readline" = "yes" ; then - LIBREADLINE="-lreadline -lncurses" - HAVE_LIBREADLINE="-DHAVE_LIBREADLINE=1" +AC_MSG_CHECKING([for readline support]) +AC_ARG_ENABLE(readline, + AC_HELP_STRING([--disable-readline], + [do not use readline library]), + [],[enable_readline='yes']) +AC_MSG_RESULT([$enable_readline]) +if test x$enable_readline = xyes ; then + LIBREADLINE="-lreadline -lncurses" + AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available]) else - LIBREADLINE="" - HAVE_LIBREADLINE="" + LIBREADLINE="" fi AC_SUBST(LIBREADLINE) -AC_SUBST(HAVE_LIBREADLINE) -AC_ARG_ENABLE(efence, [ --enable-efence use efence library],, - enable_efence="no") - +AC_MSG_CHECKING([if efence debugging support is requested]) +AC_ARG_ENABLE(efence, + AC_HELP_STRING([--enable-efence], + [use efence library]), + [],[enable_efence='no']) +AC_MSG_RESULT([$enable_efence]) if test "$enable_efence" = "yes" ; then - LIBEFENCE="-lefence" - HAVE_LIBEFENCE="-DHAVE_LIBEFENCE=1" + LIBEFENCE="-lefence" + AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested]) else - LIBEFENCE="" - HAVE_LIBEFENCE="" + LIBEFENCE="" fi AC_SUBST(LIBEFENCE) -AC_SUBST(HAVE_LIBEFENCE) - diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 9e7e7c2..c55dd37 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -8,7 +8,7 @@ #define PORTAL_DEBUG #ifndef offsetof -# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb))) +# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) #endif #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) @@ -301,6 +301,7 @@ extern void kportal_blockallsigs (void); #endif # include # include +# include # include # ifndef DEBUG_SUBSYSTEM # define DEBUG_SUBSYSTEM S_UNDEFINED @@ -309,7 +310,12 @@ extern void kportal_blockallsigs (void); # undef NDEBUG # include # define LASSERT(e) assert(e) -# define LASSERTF(cond, args...) assert(cond) +# define LASSERTF(cond, args...) \ +do { \ + if (!(cond)) \ + CERROR(args); \ + assert(cond); \ +} while (0) # else # define LASSERT(e) # define LASSERTF(cond, args...) do { } while (0) @@ -317,6 +323,7 @@ extern void kportal_blockallsigs (void); # define printk(format, args...) printf (format, ## args) # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); # define PORTAL_FREE(a, b) do { free(a); } while (0); +void portals_debug_dumplog(void); # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ (subsys), (mask), (long)time(0), file, fn, line, \ @@ -433,40 +440,6 @@ struct portals_device_userstate * USER LEVEL STUFF BELOW */ -#define PORTALS_CFG_VERSION 0x00010001; - -struct portals_cfg { - __u32 pcfg_version; - __u32 pcfg_command; - - __u32 pcfg_nal; - __u32 pcfg_flags; - - __u32 pcfg_gw_nal; - __u64 pcfg_nid; - __u64 pcfg_nid2; - __u64 pcfg_nid3; - __u32 pcfg_id; - __u32 pcfg_misc; - __u32 pcfg_fd; - __u32 pcfg_count; - __u32 pcfg_size; - __u32 pcfg_wait; - - __u32 pcfg_plen1; /* buffers in userspace */ - char *pcfg_pbuf1; - __u32 pcfg_plen2; /* buffers in userspace */ - char *pcfg_pbuf2; -}; - -#define PCFG_INIT(pcfg, cmd) \ -do { \ - memset(&pcfg, 0, sizeof(pcfg)); \ - pcfg.pcfg_version = PORTALS_CFG_VERSION; \ - pcfg.pcfg_command = (cmd); \ - \ -} while (0) - #define PORTAL_IOCTL_VERSION 0x00010007 #define PING_SYNC 0 #define PING_ASYNC 1 @@ -672,21 +645,13 @@ enum { GMNAL = 3, /* 4 unused */ TCPNAL = 5, - SCIMACNAL = 6, - ROUTER = 7, - IBNAL = 8, + ROUTER = 6, + IBNAL = 7, + CRAY_KB_ERNAL = 8, NAL_ENUM_END_MARKER }; -#ifdef __KERNEL__ -extern ptl_handle_ni_t kqswnal_ni; -extern ptl_handle_ni_t ksocknal_ni; -extern ptl_handle_ni_t kgmnal_ni; -extern ptl_handle_ni_t kibnal_ni; -extern ptl_handle_ni_t kscimacnal_ni; -#endif - -#define PTL_NALFMT_SIZE 16 +#define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */ #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) @@ -711,10 +676,6 @@ enum { DEBUG_DAEMON_CONTINUE = 4, }; -/* module.c */ -typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private); -int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private); -int kportal_nal_unregister(int nal); enum cfg_record_type { PORTALS_CFG_TYPE = 1, @@ -722,10 +683,6 @@ enum cfg_record_type { }; typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); -int kportal_nal_cmd(struct portals_cfg *); - -ptl_handle_ni_t *kportal_get_ni (int nal); -void kportal_put_ni (int nal); #ifdef __CYGWIN__ # ifndef BITS_PER_LONG @@ -737,6 +694,16 @@ void kportal_put_ni (int nal); # endif #endif +#if BITS_PER_LONG > 32 +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#else +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif + #if defined(__x86_64__) # define LPU64 "%Lu" # define LPD64 "%Ld" diff --git a/lnet/include/linux/kpr.h b/lnet/include/linux/kpr.h index 45b58fe..51d2d2f 100644 --- a/lnet/include/linux/kpr.h +++ b/lnet/include/linux/kpr.h @@ -81,21 +81,6 @@ typedef struct { void *kpr_arg; } kpr_router_t; -/* Router's control interface (Kernel Portals Routing Control Interface) */ -typedef const struct { - int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_get_route)(int index, int *gateway_nal, - ptl_nid_t *gateway, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, - int *alive); - int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid, - int alive, time_t when); -} kpr_control_interface_t; - -extern kpr_control_interface_t kpr_control_interface; extern kpr_router_interface_t kpr_router_interface; static inline int diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h index 3ebf935..c2a15f4 100644 --- a/lnet/include/linux/libcfs.h +++ b/lnet/include/linux/libcfs.h @@ -6,8 +6,43 @@ #define PORTAL_DEBUG +/* I think this beast is just trying to get cycles_t and get_cycles(). + * this should be in its own header. */ +#ifdef __linux__ +# include +# if defined(__powerpc__) && !defined(__KERNEL__) +# define __KERNEL__ +# include +# undef __KERNEL__ +# else +# if defined(__KERNEL__) +# include +# else +# include +# define cycles_t unsigned long +static inline cycles_t get_cycles(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 100000) + tv.tv_usec; +} +# endif +# endif +#else +# include +typedef u_int32_t __u32; +typedef u_int64_t __u64; +#endif + +#ifdef __KERNEL__ +# include +#else +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL); +#endif + #ifndef offsetof -# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb))) +# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) #endif #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) @@ -45,7 +80,8 @@ extern unsigned int portal_cerror; #define S_COBD 0x00200000 #define S_IBNAL 0x00400000 #define S_LMV 0x00800000 - +#define S_SM 0x01000000 +#define S_CMOBD 0x02000000 /* If you change these values, please keep portals/utils/debug.c * up to date! */ @@ -165,6 +201,45 @@ do { \ #define EXIT do { } while (0) #endif +#define PORTALS_CFG_VERSION 0x00010001; + +struct portals_cfg { + __u32 pcfg_version; + __u32 pcfg_command; + + __u32 pcfg_nal; + __u32 pcfg_flags; + + __u32 pcfg_gw_nal; + __u64 pcfg_nid; + __u64 pcfg_nid2; + __u64 pcfg_nid3; + __u32 pcfg_id; + __u32 pcfg_misc; + __u32 pcfg_fd; + __u32 pcfg_count; + __u32 pcfg_size; + __u32 pcfg_wait; + + __u32 pcfg_plen1; /* buffers in userspace */ + char *pcfg_pbuf1; + __u32 pcfg_plen2; /* buffers in userspace */ + char *pcfg_pbuf2; +}; + +#define PCFG_INIT(pcfg, cmd) \ +do { \ + memset(&pcfg, 0, sizeof(pcfg)); \ + pcfg.pcfg_version = PORTALS_CFG_VERSION; \ + pcfg.pcfg_command = (cmd); \ + \ +} while (0) + +typedef int (nal_cmd_handler_fn)(struct portals_cfg *, void *); +int libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *arg); +int libcfs_nal_cmd(struct portals_cfg *pcfg); +void libcfs_nal_cmd_unregister(int nal); + struct portal_ioctl_data { __u32 ioc_len; __u32 ioc_version; @@ -197,6 +272,7 @@ struct portal_ioctl_data { char ioc_bulk[0]; }; + #ifdef __KERNEL__ #include diff --git a/lnet/include/linux/portals_compat25.h b/lnet/include/linux/portals_compat25.h index 43618589..9ab4020 100644 --- a/lnet/include/linux/portals_compat25.h +++ b/lnet/include/linux/portals_compat25.h @@ -40,6 +40,9 @@ # define RECALC_SIGPENDING recalc_sigpending() # define CURRENT_SECONDS CURRENT_TIME +# define kernel_text_address(addr) is_kernel_text_address(addr) +extern int is_kernel_text_address(unsigned long addr); + #else /* 2.4.x */ # define SIGNAL_MASK_LOCK(task, flags) \ @@ -51,17 +54,30 @@ # define RECALC_SIGPENDING recalc_sigpending(current) # define CURRENT_SECONDS CURRENT_TIME +# define kernel_text_address(addr) is_kernel_text_address(addr) +extern int is_kernel_text_address(unsigned long addr); + #endif #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) -# define THREAD_NAME(comm, fmt, a...) \ - sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid) +# define THREAD_NAME(comm, len, fmt, a...) \ + snprintf(comm, len, fmt "|%d", ## a, current->thread.extern_pid) #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define THREAD_NAME(comm, fmt, a...) \ - sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid) +# define THREAD_NAME(comm, len, fmt, a...) \ + snprintf(comm, len,fmt"|%d", ## a,current->thread.mode.tt.extern_pid) +#else +# define THREAD_NAME(comm, len, fmt, a...) \ + snprintf(comm, len, fmt, ## a) +#endif + +#ifdef HAVE_PAGE_LIST +/* 2.4 alloc_page users can use page->list */ +#define PAGE_LIST_ENTRY list +#define PAGE_LIST(page) ((page)->list) #else -# define THREAD_NAME(comm, fmt, a...) \ - sprintf(comm, fmt, ## a) +/* 2.6 alloc_page users can use page->lru */ +#define PAGE_LIST_ENTRY lru +#define PAGE_LIST(page) ((page)->lru) #endif #endif /* _PORTALS_COMPAT_H */ diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am deleted file mode 100644 index c61b084..0000000 --- a/lnet/include/lnet/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = base -include $(top_srcdir)/Rules - -pkginclude_HEADERS=api-support.h api.h arg-blocks.h defines.h errno.h internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h myrnal.h nal.h p30.h ppid.h ptlctl.h stringtab.h types.h nalids.h list.h bridge.h ipmap.h procbridge.h lltrace.h - diff --git a/lnet/include/lnet/api-support.h b/lnet/include/lnet/api-support.h index db83ae7..cfae78c 100644 --- a/lnet/include/lnet/api-support.h +++ b/lnet/include/lnet/api-support.h @@ -1,5 +1,3 @@ -# define DEBUG_SUBSYSTEM S_PORTALS -# define PORTAL_DEBUG #include "build_check.h" diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h index 69fa339..6d382bb 100644 --- a/lnet/include/lnet/api.h +++ b/lnet/include/lnet/api.h @@ -9,9 +9,9 @@ int PtlInit(int *); void PtlFini(void); -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in, - ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid, - ptl_handle_ni_t * interface_out); +int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *interface_out); int PtlNIInitialized(ptl_interface_t); @@ -37,17 +37,6 @@ int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); #endif -/* - * PtlNIDebug: - * - * This is not an official Portals 3 API call. It is provided - * by the reference implementation to allow the maintainers an - * easy way to turn on and off debugging information in the - * library. Do not use it in code that is not intended for use - * with any version other than the portable reference library. - */ -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in); - /* * PtlNIFailNid * @@ -123,8 +112,8 @@ int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, /* These should be called by users */ int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out); + ptl_eq_handler_t handler, + ptl_handle_eq_t *handle_out); int PtlEQFree(ptl_handle_eq_t eventq_in); int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out); diff --git a/lnet/include/lnet/arg-blocks.h b/lnet/include/lnet/arg-blocks.h index 0be8a3d..21e30d5 100644 --- a/lnet/include/lnet/arg-blocks.h +++ b/lnet/include/lnet/arg-blocks.h @@ -18,7 +18,7 @@ #define PTL_GETID 1 #define PTL_NISTATUS 2 #define PTL_NIDIST 3 -#define PTL_NIDEBUG 4 +// #define PTL_NIDEBUG 4 #define PTL_MEATTACH 5 #define PTL_MEINSERT 6 // #define PTL_MEPREPEND 7 @@ -205,7 +205,7 @@ typedef struct PtlEQAlloc_in { ptl_size_t count_in; void *base_in; int len_in; - int (*callback_in) (ptl_event_t * event); + ptl_eq_handler_t callback_in; } PtlEQAlloc_in; typedef struct PtlEQAlloc_out { diff --git a/lnet/include/lnet/errno.h b/lnet/include/lnet/errno.h index 499f32b..a98bfd9 100644 --- a/lnet/include/lnet/errno.h +++ b/lnet/include/lnet/errno.h @@ -37,12 +37,11 @@ typedef enum { PTL_MD_NO_UPDATE = 18, PTL_FAIL = 19, - PTL_IOV_TOO_MANY = 20, - PTL_IOV_TOO_SMALL = 21, + PTL_IOV_INVALID = 20, - PTL_EQ_IN_USE = 22, + PTL_EQ_IN_USE = 21, - PTL_MAX_ERRNO = 23 + PTL_MAX_ERRNO = 22 } ptl_err_t; /* If you change these, you must update the string table in api-errno.c */ diff --git a/lnet/include/lnet/internal.h b/lnet/include/lnet/internal.h index 94f4f48..25778e4 100644 --- a/lnet/include/lnet/internal.h +++ b/lnet/include/lnet/internal.h @@ -11,25 +11,10 @@ #include -extern int ptl_init; /* Has the library be initialized */ +extern int ptl_init; /* Has the library been initialized */ extern int ptl_ni_init(void); -extern int ptl_me_init(void); -extern int ptl_md_init(void); -extern int ptl_eq_init(void); - -extern int ptl_me_ni_init(nal_t * nal); -extern int ptl_md_ni_init(nal_t * nal); -extern int ptl_eq_ni_init(nal_t * nal); - extern void ptl_ni_fini(void); -extern void ptl_me_fini(void); -extern void ptl_md_fini(void); -extern void ptl_eq_fini(void); - -extern void ptl_me_ni_fini(nal_t * nal); -extern void ptl_md_ni_fini(nal_t * nal); -extern void ptl_eq_ni_fini(nal_t * nal); static inline ptl_eq_t * ptl_handle2usereq (ptl_handle_eq_t *handle) diff --git a/lnet/include/lnet/lib-dispatch.h b/lnet/include/lnet/lib-dispatch.h index 90ed4f5..610c776 100644 --- a/lnet/include/lnet/lib-dispatch.h +++ b/lnet/include/lnet/lib-dispatch.h @@ -18,7 +18,6 @@ extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret); extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args, diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 350447e..efa929c 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -194,11 +194,11 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) int niov; if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; + niov = umd->length; size = offsetof(lib_md_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->niov : 1; + umd->length : 1; size = offsetof(lib_md_t, md_iov.iov[niov]); } @@ -346,8 +346,9 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) return (lh_entry (lh, lib_me_t, me_lh)); } -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); +extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid, + ptl_ni_limits_t *desired_limits, + ptl_ni_limits_t *actual_limits); extern int lib_fini(nal_cb_t * cb); extern void lib_dispatch(nal_cb_t * cb, void *private, int index, void *arg_block, void *ret_block); diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index 350447e..efa929c 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -194,11 +194,11 @@ lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd) int niov; if ((umd->options & PTL_MD_KIOV) != 0) { - niov = umd->niov; + niov = umd->length; size = offsetof(lib_md_t, md_iov.kiov[niov]); } else { niov = ((umd->options & PTL_MD_IOVEC) != 0) ? - umd->niov : 1; + umd->length : 1; size = offsetof(lib_md_t, md_iov.iov[niov]); } @@ -346,8 +346,9 @@ ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) return (lh_entry (lh, lib_me_t, me_lh)); } -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); +extern int lib_init(nal_cb_t *cb, ptl_process_id_t pid, + ptl_ni_limits_t *desired_limits, + ptl_ni_limits_t *actual_limits); extern int lib_fini(nal_cb_t * cb); extern void lib_dispatch(nal_cb_t * cb, void *private, int index, void *arg_block, void *ret_block); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 40776a6..ef618c7 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -169,7 +169,7 @@ struct lib_eq_t { ptl_size_t size; ptl_event_t *base; int eq_refcount; - int (*event_callback) (ptl_event_t * event); + ptl_eq_handler_t event_callback; void *eq_addrkey; }; @@ -245,15 +245,11 @@ typedef struct { * extracted by masking with (PTL_COOKIE_TYPES - 1) */ typedef struct { - int up; - int refcnt; ptl_nid_t nid; ptl_pid_t pid; - int num_nodes; - unsigned int debug; lib_ptl_t tbl; - lib_ac_t ac; lib_counters_t counters; + ptl_ni_limits_t actual_limits; int ni_lh_hash_size; /* size of lib handle hash table */ struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ diff --git a/lnet/include/lnet/lnet.h b/lnet/include/lnet/lnet.h index 577ffab..4b8631d 100644 --- a/lnet/include/lnet/lnet.h +++ b/lnet/include/lnet/lnet.h @@ -21,45 +21,6 @@ #endif #include -#include #include -#include - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) #endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index 12ef47a..2af336e 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -23,6 +23,10 @@ #ifndef _PTLCTL_H_ #define _PTLCTL_H_ +#include +#include +#include + #define PORTALS_DEV_ID 0 #define PORTALS_DEV_PATH "/dev/portals" #define OBD_DEV_ID 1 @@ -76,13 +80,15 @@ int jt_dbg_panic(int argc, char **argv); int ptl_set_cfg_record_cb(cfg_record_cb_t cb); /* l_ioctl.c */ -typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); +typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf); void set_ioc_handler(ioc_handler_t *handler); int register_ioc_dev(int dev_id, const char * dev_name); void unregister_ioc_dev(int dev_id); int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, int opc, void *buf); -int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); +int l_ioctl(int dev_id, unsigned int opc, void *buf); +int parse_dump(char * dump_file, ioc_handler_t ioc_func); int jt_ioc_dump(int argc, char **argv); +extern char *dump_filename; +int dump(int dev_id, unsigned int opc, void *buf); #endif diff --git a/lnet/include/lnet/nal.h b/lnet/include/lnet/nal.h index 5b72046..1f925c1 100644 --- a/lnet/include/lnet/nal.h +++ b/lnet/include/lnet/nal.h @@ -18,32 +18,29 @@ typedef struct nal_t nal_t; struct nal_t { - ptl_ni_t ni; - int refct; - void *nal_data; - int *timeout; /* for libp30api users */ - int (*forward) (nal_t * nal, int index, /* Function ID */ - void *args, size_t arg_len, void *ret, size_t ret_len); + int nal_refct; + void *nal_data; - int (*shutdown) (nal_t * nal, int interface); + int (*startup) (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *req, ptl_ni_limits_t *actual); + + void (*shutdown) (nal_t *nal); - int (*validate) (nal_t * nal, void *base, size_t extent); + int (*forward) (nal_t *nal, int index, /* Function ID */ + void *args, size_t arg_len, void *ret, size_t ret_len); - int (*yield) (nal_t * nal, unsigned long *flags, int milliseconds); + int (*yield) (nal_t *nal, unsigned long *flags, int milliseconds); - void (*lock) (nal_t * nal, unsigned long *flags); + void (*lock) (nal_t *nal, unsigned long *flags); - void (*unlock) (nal_t * nal, unsigned long *flags); + void (*unlock) (nal_t *nal, unsigned long *flags); }; -typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any); -#ifndef PTL_IFACE_DEFAULT -#define PTL_IFACE_DEFAULT (PTL_IFACE_IP) +#ifdef __KERNEL__ +extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal); +extern void ptl_unregister_nal(ptl_interface_t interface); #endif #endif diff --git a/lnet/include/lnet/nalids.h b/lnet/include/lnet/nalids.h index 1568593..55a991b 100644 --- a/lnet/include/lnet/nalids.h +++ b/lnet/include/lnet/nalids.h @@ -1,6 +1,2 @@ #include "build_check.h" -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 diff --git a/lnet/include/lnet/p30.h b/lnet/include/lnet/p30.h index 577ffab..4b8631d 100644 --- a/lnet/include/lnet/p30.h +++ b/lnet/include/lnet/p30.h @@ -21,45 +21,6 @@ #endif #include -#include #include -#include - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) #endif diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index 12ef47a..2af336e 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -23,6 +23,10 @@ #ifndef _PTLCTL_H_ #define _PTLCTL_H_ +#include +#include +#include + #define PORTALS_DEV_ID 0 #define PORTALS_DEV_PATH "/dev/portals" #define OBD_DEV_ID 1 @@ -76,13 +80,15 @@ int jt_dbg_panic(int argc, char **argv); int ptl_set_cfg_record_cb(cfg_record_cb_t cb); /* l_ioctl.c */ -typedef int (ioc_handler_t)(int dev_id, int opc, void *buf); +typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf); void set_ioc_handler(ioc_handler_t *handler); int register_ioc_dev(int dev_id, const char * dev_name); void unregister_ioc_dev(int dev_id); int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, int opc, void *buf); -int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); +int l_ioctl(int dev_id, unsigned int opc, void *buf); +int parse_dump(char * dump_file, ioc_handler_t ioc_func); int jt_ioc_dump(int argc, char **argv); +extern char *dump_filename; +int dump(int dev_id, unsigned int opc, void *buf); #endif diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index 902db76..ef2712b 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -3,28 +3,7 @@ #include "build_check.h" -#ifdef __linux__ -# include -# if defined(__powerpc__) && !defined(__KERNEL__) -# define __KERNEL__ -# include -# undef __KERNEL__ -# else -# include -# endif -#else -# include -typedef u_int32_t __u32; -typedef u_int64_t __u64; -#endif - -#ifdef __KERNEL__ -# include -#else -# include -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -#endif - +#include #include /* This implementation uses the same type for API function return codes and @@ -41,7 +20,6 @@ typedef __u64 ptl_hdr_data_t; typedef __u32 ptl_size_t; #define PTL_TIME_FOREVER (-1) -#define PTL_EQ_HANDLER_NONE NULL typedef struct { unsigned long nal_idx; /* which network interface */ @@ -81,12 +59,6 @@ typedef enum { } ptl_ins_pos_t; typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} ptl_kiov_t; - -typedef struct { void *start; ptl_size_t length; int threshold; @@ -94,7 +66,6 @@ typedef struct { unsigned int options; void *user_ptr; ptl_handle_eq_t eventq; - unsigned int niov; } ptl_md_t; /* Options for the MD structure */ @@ -112,9 +83,19 @@ typedef struct { /* For compatibility with Cray Portals */ #define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 +#define PTL_MD_PHYS 0 #define PTL_MD_THRESH_INF (-1) +/* NB lustre portals uses struct iovec internally! */ +typedef struct iovec ptl_md_iovec_t; + +typedef struct { + struct page *kiov_page; + unsigned int kiov_len; + unsigned int kiov_offset; +} ptl_kiov_t; + typedef enum { PTL_EVENT_GET_START, PTL_EVENT_GET_END, @@ -168,6 +149,9 @@ typedef enum { PTL_NOACK_REQ } ptl_ack_req_t; +typedef void (*ptl_eq_handler_t)(ptl_event_t *event); +#define PTL_EQ_HANDLER_NONE NULL + typedef struct { volatile ptl_seq_t sequence; ptl_size_t size; @@ -180,11 +164,14 @@ typedef struct { } ptl_ni_t; typedef struct { - int max_match_entries; /* max number of match entries */ - int max_mem_descriptors; /* max number of memory descriptors */ - int max_event_queues; /* max number of event queues */ - int max_atable_index; /* maximum access control list table index */ - int max_ptable_index; /* maximum portals table index */ + int max_mes; + int max_mds; + int max_eqs; + int max_ac_index; + int max_pt_index; + int max_md_iovecs; + int max_me_list; + int max_getput_md; } ptl_ni_limits_t; /* @@ -202,4 +189,7 @@ typedef enum { typedef int ptl_sr_value_t; +typedef int ptl_interface_t; +#define PTL_IFACE_DEFAULT (-1) + #endif diff --git a/lnet/klnds/.cvsignore b/lnet/klnds/.cvsignore index 89a4aa6..f5fd0b0 100644 --- a/lnet/klnds/.cvsignore +++ b/lnet/klnds/.cvsignore @@ -1,3 +1,5 @@ Makefile -Makefile.in -.*.o.cmd +autoMakefile +autoMakefile.in +.*.cmd +.depend diff --git a/lnet/klnds/Makefile.in b/lnet/klnds/Makefile.in new file mode 100644 index 0000000..b5ed168 --- /dev/null +++ b/lnet/klnds/Makefile.in @@ -0,0 +1,6 @@ +@BUILD_GMNAL_TRUE@subdir-m += gmnal +@BUILD_IBNAL_TRUE@subdir-m += ibnal +@BUILD_QSWNAL_TRUE@subdir-m += qswnal +subdir-m += socknal + +@INCLUDE_RULES@ diff --git a/lnet/include/Makefile.am b/lnet/klnds/autoMakefile.am similarity index 66% rename from lnet/include/Makefile.am rename to lnet/klnds/autoMakefile.am index 2cf7f99..9d04a46 100644 --- a/lnet/include/Makefile.am +++ b/lnet/klnds/autoMakefile.am @@ -3,6 +3,4 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -SUBDIRS = portals linux -EXTRA_DIST = config.h.in -include $(top_srcdir)/Rules +SUBDIRS = gmnal ibnal qswnal socknal diff --git a/lnet/klnds/gmlnd/.cvsignore b/lnet/klnds/gmlnd/.cvsignore index e995588..642e2e6 100644 --- a/lnet/klnds/gmlnd/.cvsignore +++ b/lnet/klnds/gmlnd/.cvsignore @@ -1,3 +1,10 @@ .deps Makefile -Makefile.in +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.cmd +.*.flags +.tmp_versions +.depend diff --git a/lnet/klnds/gmlnd/Makefile.am b/lnet/klnds/gmlnd/Makefile.am deleted file mode 100644 index bac4680..0000000 --- a/lnet/klnds/gmlnd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kgmnal -modulenet_DATA = kgmnal.o -EXTRA_PROGRAMS = kgmnal - -DEFS = -DGM_KERNEL -kgmnal_SOURCES = gmnal.h gmnal_api.c gmnal_cb.c gmnal_comm.c gmnal_utils.c gmnal_module.c diff --git a/lnet/klnds/gmlnd/Makefile.in b/lnet/klnds/gmlnd/Makefile.in new file mode 100644 index 0000000..89ea361 --- /dev/null +++ b/lnet/klnds/gmlnd/Makefile.in @@ -0,0 +1,6 @@ +MODULES := kgmnal +kgmnal-objs := gmnal_api.o gmnal_cb.o gmnal_comm.o gmnal_utils.o gmnal_module.o + +EXTRA_PRE_CFLAGS := @GMCPPFLAGS@ + +@INCLUDE_RULES@ diff --git a/lnet/klnds/gmlnd/autoMakefile.am b/lnet/klnds/gmlnd/autoMakefile.am new file mode 100644 index 0000000..d8b9edb --- /dev/null +++ b/lnet/klnds/gmlnd/autoMakefile.am @@ -0,0 +1,15 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +if BUILD_GMNAL +if !CRAY_PORTALS +modulenet_DATA = kgmnal$(KMODEXT) +endif +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(kgmnal-objs:%.o=%.c) gmnal.h diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index 9955599..e48552e 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -190,7 +190,6 @@ typedef struct _gmnal_rxtwe { #define NRXTHREADS 10 /* max number of receiver threads */ typedef struct _gmnal_data_t { - int refcnt; spinlock_t cb_lock; spinlock_t stxd_lock; struct semaphore stxd_token; @@ -309,9 +308,12 @@ extern gmnal_data_t *global_nal_data; /* * API NAL */ +int gmnal_api_startup(nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); + int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); -int gmnal_api_shutdown(nal_t *, int); +void gmnal_api_shutdown(nal_t *); int gmnal_api_validate(nal_t *, void *, size_t); @@ -323,14 +325,13 @@ void gmnal_api_unlock(nal_t *, unsigned long *); #define GMNAL_INIT_NAL(a) do { \ + a->startup = gmnal_api_startup; \ a->forward = gmnal_api_forward; \ a->shutdown = gmnal_api_shutdown; \ - a->validate = NULL; \ a->yield = gmnal_api_yield; \ a->lock = gmnal_api_lock; \ a->unlock = gmnal_api_unlock; \ a->timeout = NULL; \ - a->refct = 1; \ a->nal_data = NULL; \ } while (0) @@ -373,7 +374,7 @@ void gmnal_cb_sti(nal_cb_t *, unsigned long *); int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); -nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); +int gmnal_init(void); void gmnal_fini(void); diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c index 338d75c..7c94f93 100644 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ b/lnet/klnds/gmlnd/gmlnd_api.c @@ -123,18 +123,51 @@ gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, /* * gmnal_api_shutdown + * nal_refct == 0 => called on last matching PtlNIFini() * Close down this interface and free any resources associated with it * nal_t nal our nal to shutdown */ -int +void gmnal_api_shutdown(nal_t *nal, int interface) { + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; - gmnal_data_t *nal_data = nal->nal_data; - + if (nal->nal_refct != 0) + return; + CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); - return(PTL_OK); + LASSERT(nal == global_nal_data->nal); + nal_data = nal->nal_data; + LASSERT(nal_data == global_nal_data); + nal_cb = nal_data->nal_cb; + + /* Stop portals calling our ioctl handler */ + libcfs_nal_cmd_unregister(GMNAL); + + /* XXX for shutdown "under fire" we probably need to set a shutdown + * flag so when lib calls us we fail immediately and dont queue any + * more work but our threads can still call into lib OK. THEN + * shutdown our threads, THEN lib_fini() */ + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_txd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + if (nal_data->sysctl) + unregister_sysctl_table (nal_data->sysctl); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + + global_nal_data = NULL; + PORTAL_MODULE_UNUSE; } @@ -210,57 +243,54 @@ gmnal_api_unlock(nal_t *nal, unsigned long *flags) } -nal_t * -gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t rpid) +int +gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - nal_t *nal = NULL; nal_cb_t *nal_cb = NULL; gmnal_data_t *nal_data = NULL; gmnal_srxd_t *srxd = NULL; gm_status_t gm_status; unsigned int local_nid = 0, global_nid = 0; - ptl_nid_t portals_nid; - ptl_pid_t portals_pid = 0; + ptl_process_id_t process_id; + + if (nal->nal_refct != 0) { + if (actual_limits != NULL) { + nal_data = (gmnal_data_t *)nal->nal_data; + nal_cb = nal_data->nal_cb; + *actual_limits = nal->_cb->ni.actual_limits; + return (PTL_OK); + } + /* Called on first PtlNIInit() */ - CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], " - "ac_size[%d]\n", interface, ptl_size, ac_size); + CDEBUG(D_TRACE, "startup\n"); + LASSERT(global_nal_data == NULL); PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); if (!nal_data) { CDEBUG(D_ERROR, "can't get memory\n"); - return(NULL); + return(PTL_NO_SPACE); } memset(nal_data, 0, sizeof(gmnal_data_t)); /* * set the small message buffer size */ - nal_data->refcnt = 1; CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); - PORTAL_ALLOC(nal, sizeof(nal_t)); - if (!nal) { - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); - } - memset(nal, 0, sizeof(nal_t)); - CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal); - PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); if (!nal_cb) { - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - return(NULL); + return(PTL_NO_SPACE); } memset(nal_cb, 0, sizeof(nal_cb_t)); CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb); - GMNAL_INIT_NAL(nal); GMNAL_INIT_NAL_CB(nal_cb); /* * String them all together @@ -280,10 +310,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, CDEBUG(D_INFO, "Calling gm_init\n"); if (gm_init() != GM_SUCCESS) { CDEBUG(D_ERROR, "call to gm_init failed\n"); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -326,10 +355,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, GMNAL_GM_LOCK(nal_data); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -344,10 +372,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } @@ -374,10 +401,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } gmnal_start_kernel_threads(nal_data); @@ -407,10 +433,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } nal_data->gm_local_nid = local_nid; CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid); @@ -428,10 +453,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); nal_data->gm_global_nid = global_nid; @@ -440,13 +464,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, /* pid = gm_getpid(); */ - CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid); - portals_nid = (unsigned long)global_nid; - CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid); + process_id.pid = 0; + process_id.nid = global_nid; + + CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid); + CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid); CDEBUG(D_PORTALS, "calling lib_init\n"); - if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, - ac_size) != PTL_OK) { + if (lib_init(nal_cb, process_id, + requested_limits, actual_limits) != PTL_OK) { CDEBUG(D_ERROR, "lib_init failed\n"); gmnal_stop_rxthread(nal_data); gmnal_stop_ctthread(nal_data); @@ -456,22 +482,68 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, gm_close(nal_data->gm_port); gm_finalize(); GMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); + return(PTL_FAIL); } + + if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, nal->nal_data) != 0) { + CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n"); + + /* XXX these cleanup cases should be restructured to + * minimise duplication... */ + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_txd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(PTL_FAIL); + } + + /* might be better to initialise this at module load rather than in + * NAL startup */ nal_data->sysctl = NULL; nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0); CDEBUG(D_INFO, "gmnal_init finished\n"); global_nal_data = nal->nal_data; - return(nal); + + /* no unload now until shutdown */ + PORTAL_MODULE_USE; + + return(PTL_OK); } +nal_t the_gm_nal; + +/* + * Called when module loaded + */ +int gmnal_init(void) +{ + int rc; + + memset(&the_gm_nal, 0, sizeof(nal_t)); + CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal); + GMNAL_INIT_NAL(&the_gm_nal); + rc = ptl_register_nal(GMNAL, &the_gm_nal); + if (rc != PTL_OK) + CERROR("Can't register GMNAL: %d\n", rc); + + return (rc); +} + + /* * Called when module removed @@ -484,20 +556,7 @@ void gmnal_fini() CDEBUG(D_TRACE, "gmnal_fini\n"); - PtlNIFini(kgmnal_ni); - lib_fini(nal_cb); + LASSERT(global_nal_data == NULL); - gmnal_stop_rxthread(nal_data); - gmnal_stop_ctthread(nal_data); - gmnal_free_txd(nal_data); - gmnal_free_srxd(nal_data); - GMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - GMNAL_GM_UNLOCK(nal_data); - if (nal_data->sysctl) - unregister_sysctl_table (nal_data->sysctl); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + ptl_unregister_nal(GMNAL); } diff --git a/lnet/klnds/gmlnd/gmlnd_module.c b/lnet/klnds/gmlnd/gmlnd_module.c index 31f6819..278230e 100644 --- a/lnet/klnds/gmlnd/gmlnd_module.c +++ b/lnet/klnds/gmlnd/gmlnd_module.c @@ -32,9 +32,6 @@ int num_rx_threads = -1; int num_stxds = 5; int gm_port = 4; -ptl_handle_ni_t kgmnal_ni; - - int gmnal_cmd(struct portals_cfg *pcfg, void *private) { @@ -92,26 +89,15 @@ gmnal_load(void) CDEBUG(D_INFO, "Calling gmnal_init\n"); - status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni); + statud = gmnal_init(); if (status == PTL_OK) { - CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n"); + CDEBUG(D_INFO, "Portals GMNAL initialised ok\n"); } else { CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n"); - return(1); + return(-ENODEV); } - CDEBUG(D_INFO, "Calling kportal_nal_register\n"); - /* - * global_nal_data is set by gmnal_init - */ - if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) { - CDEBUG(D_INFO, "kportal_nal_register failed\n"); - return(1); - } - - CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n"); - PORTAL_SYMBOL_REGISTER(kgmnal_ni); CDEBUG(D_INFO, "This is the end of the gmnal init routine"); @@ -122,11 +108,7 @@ gmnal_load(void) static void __exit gmnal_unload(void) { - - kportal_nal_unregister(GMNAL); - PORTAL_SYMBOL_UNREGISTER(kgmnal_ni); gmnal_fini(); - global_nal_data = NULL; return; } @@ -135,8 +117,6 @@ module_init(gmnal_load); module_exit(gmnal_unload); -EXPORT_SYMBOL(kgmnal_ni); - MODULE_PARM(gmnal_small_msg_size, "i"); MODULE_PARM(num_rx_threads, "i"); MODULE_PARM(num_stxds, "i"); diff --git a/lnet/klnds/iblnd/.cvsignore b/lnet/klnds/iblnd/.cvsignore index e995588..48b17e9 100644 --- a/lnet/klnds/iblnd/.cvsignore +++ b/lnet/klnds/iblnd/.cvsignore @@ -1,3 +1,10 @@ .deps Makefile -Makefile.in +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.*.cmd +.tmp_versions +.depend diff --git a/lnet/klnds/iblnd/Makefile.am b/lnet/klnds/iblnd/Makefile.am deleted file mode 100644 index 84818dc..0000000 --- a/lnet/klnds/iblnd/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -include ../../Rules.linux - -MODULE = kibnal -modulenet_DATA = kibnal.o -EXTRA_PROGRAMS = kibnal - - -DEFS = -CPPFLAGS=@CPPFLAGS@ @with_ib@ -kibnal_SOURCES = ibnal.h ibnal.c ibnal_cb.c diff --git a/lnet/klnds/iblnd/Makefile.in b/lnet/klnds/iblnd/Makefile.in new file mode 100644 index 0000000..e180b3e --- /dev/null +++ b/lnet/klnds/iblnd/Makefile.in @@ -0,0 +1,6 @@ +MODULES := kibnal +kibnal-objs := ibnal.o ibnal_cb.o + +EXTRA_PRE_CFLAGS := @IBCPPFLAGS@ + +@INCLUDE_RULES@ diff --git a/lnet/klnds/iblnd/autoMakefile.am b/lnet/klnds/iblnd/autoMakefile.am new file mode 100644 index 0000000..ffe084c --- /dev/null +++ b/lnet/klnds/iblnd/autoMakefile.am @@ -0,0 +1,10 @@ +if MODULES +if !CRAY_PORTALS +if BUILD_IBNAL +modulenet_DATA = kibnal$(KMODEXT) +endif +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(kibnal-objs:%.o=%.c) ibnal.h diff --git a/lnet/klnds/iblnd/ibnal.c b/lnet/klnds/iblnd/ibnal.c index 02beca7..86c2a63 100644 --- a/lnet/klnds/iblnd/ibnal.c +++ b/lnet/klnds/iblnd/ibnal.c @@ -235,11 +235,6 @@ kibnal_init(int interface, // no use here kibnal_data_t *nal_data = NULL; int rc; - unsigned int nnids = 1; // number of nids - // do we know how many nodes are in this - // system related to this kib_nid - // - CDEBUG(D_NET, "kibnal_init:calling lib_init with nid 0x%u\n", kibnal_data.kib_nid); @@ -252,7 +247,6 @@ kibnal_init(int interface, // no use here rc = lib_init(&kibnal_lib, kibnal_data.kib_nid, 0, // process id is set as 0 - nnids, ptl_size, ac_size); @@ -2034,16 +2028,13 @@ kibnal_initialize(void) CDEBUG(D_PORTALS, "kibnal_initialize: Enter kibnal_initialize\n"); // set api functional pointers + kibnal_api.startup = kibnal_startup; kibnal_api.forward = kibnal_forward; kibnal_api.shutdown = kibnal_shutdown; kibnal_api.yield = kibnal_yield; - kibnal_api.validate = NULL; /* our api validate is a NOOP */ kibnal_api.lock = kibnal_lock; kibnal_api.unlock = kibnal_unlock; kibnal_api.nal_data = &kibnal_data; // this is so called private data - kibnal_api.refct = 1; - kibnal_api.timeout = NULL; - kibnal_lib.nal_data = &kibnal_data; memset(&kibnal_data, 0, sizeof(kibnal_data)); diff --git a/lnet/klnds/qswlnd/.cvsignore b/lnet/klnds/qswlnd/.cvsignore index e995588..48b17e9 100644 --- a/lnet/klnds/qswlnd/.cvsignore +++ b/lnet/klnds/qswlnd/.cvsignore @@ -1,3 +1,10 @@ .deps Makefile -Makefile.in +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.*.cmd +.tmp_versions +.depend diff --git a/lnet/klnds/qswlnd/Makefile.am b/lnet/klnds/qswlnd/Makefile.am deleted file mode 100644 index 3eb4dd5..0000000 --- a/lnet/klnds/qswlnd/Makefile.am +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kqswnal -modulenet_DATA = kqswnal.o -EXTRA_PROGRAMS = kqswnal - - -#CFLAGS:= @KCFLAGS@ -#CPPFLAGS:=@KCPPFLAGS@ -DEFS = -CPPFLAGS=@CPPFLAGS@ @with_quadrics@ -kqswnal_SOURCES = qswnal.c qswnal_cb.c qswnal.h diff --git a/lnet/klnds/qswlnd/Makefile.in b/lnet/klnds/qswlnd/Makefile.in new file mode 100644 index 0000000..d27240c --- /dev/null +++ b/lnet/klnds/qswlnd/Makefile.in @@ -0,0 +1,6 @@ +MODULES := kqswnal +kqswnal-objs := qswnal.o qswnal_cb.o + +EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include + +@INCLUDE_RULES@ diff --git a/lnet/klnds/qswlnd/autoMakefile.am b/lnet/klnds/qswlnd/autoMakefile.am new file mode 100644 index 0000000..b5b2e07 --- /dev/null +++ b/lnet/klnds/qswlnd/autoMakefile.am @@ -0,0 +1,15 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +if !CRAY_PORTALS +if BUILD_QSWNAL +modulenet_DATA = kqswnal$(KMODEXT) +endif +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(kqswnal-objs:%.o=%.c) qswnal.h diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index a386eef..f4005de 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -24,9 +24,10 @@ #include "qswnal.h" -ptl_handle_ni_t kqswnal_ni; nal_t kqswnal_api; kqswnal_data_t kqswnal_data; +ptl_handle_ni_t kqswnal_ni; +kqswnal_tunables_t kqswnal_tunables; kpr_nal_interface_t kqswnal_router_interface = { kprni_nalid: QSWNAL, @@ -43,10 +44,7 @@ kpr_nal_interface_t kqswnal_router_interface = { static ctl_table kqswnal_ctl_table[] = { {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets", - &kqswnal_data.kqn_optimized_gets, sizeof (int), - 0644, NULL, &proc_dointvec}, - {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd", - &kqswnal_data.kqn_copy_small_fwd, sizeof (int), + &kqswnal_tunables.kqn_optimized_gets, sizeof (int), 0644, NULL, &proc_dointvec}, {0} }; @@ -101,15 +99,6 @@ kqswnal_unlock(nal_t *nal, unsigned long *flags) } static int -kqswnal_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "shutdown\n"); - - LASSERT (nal == &kqswnal_api); - return (0); -} - -static int kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) { /* NB called holding statelock */ @@ -119,7 +108,7 @@ kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) CDEBUG (D_NET, "yield\n"); if (milliseconds == 0) { - if (current->need_resched) + if (need_resched()) schedule(); return 0; } @@ -148,20 +137,6 @@ kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds) return (milliseconds); } -static nal_t * -kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t requested_pid) -{ - ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); - int nnids = kqswnal_data.kqn_nnodes; - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); - - lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); - - return (&kqswnal_api); -} - int kqswnal_get_tx_desc (struct portals_cfg *pcfg) { @@ -219,26 +194,32 @@ kqswnal_cmd (struct portals_cfg *pcfg, void *private) } } -void __exit -kqswnal_finalise (void) +static void +kqswnal_shutdown(nal_t *nal) { + unsigned long flags; + int do_lib_fini = 0; + + /* NB The first ref was this module! */ + if (nal->nal_refct != 0) { + PORTAL_MODULE_UNUSE; + return; + } + + CDEBUG (D_NET, "shutdown\n"); + LASSERT (nal == &kqswnal_api); + switch (kqswnal_data.kqn_init) { default: LASSERT (0); case KQN_INIT_ALL: -#if CONFIG_SYSCTL - if (kqswnal_data.kqn_sysctl != NULL) - unregister_sysctl_table (kqswnal_data.kqn_sysctl); -#endif - PORTAL_SYMBOL_UNREGISTER (kqswnal_ni); - kportal_nal_unregister(QSWNAL); + libcfs_nal_cmd_unregister(QSWNAL); /* fall through */ - case KQN_INIT_PTL: - PtlNIFini (kqswnal_ni); - lib_fini (&kqswnal_lib); + case KQN_INIT_LIB: + do_lib_fini = 1; /* fall through */ case KQN_INIT_DATA: @@ -249,18 +230,24 @@ kqswnal_finalise (void) } /**********************************************************************/ - /* Make router stop her calling me and fail any more call-ins */ + /* Tell router we're shutting down. Any router calls my threads + * make will now fail immediately and the router will stop calling + * into me. */ kpr_shutdown (&kqswnal_data.kqn_router); - + /**********************************************************************/ - /* flag threads we've started to terminate and wait for all to ack */ - + /* Signal the start of shutdown... */ + spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags); kqswnal_data.kqn_shuttingdown = 1; - wake_up_all (&kqswnal_data.kqn_sched_waitq); + spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); + + wake_up_all(&kqswnal_data.kqn_idletxd_waitq); - while (atomic_read (&kqswnal_data.kqn_nthreads_running) != 0) { - CDEBUG(D_NET, "waiting for %d threads to start shutting down\n", - atomic_read (&kqswnal_data.kqn_nthreads_running)); + /**********************************************************************/ + /* wait for sends that have allocated a tx desc to launch or give up */ + while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) { + CDEBUG(D_NET, "waiting for %d pending sends\n", + atomic_read (&kqswnal_data.kqn_pending_txs)); set_current_state (TASK_UNINTERRUPTIBLE); schedule_timeout (HZ); } @@ -268,18 +255,27 @@ kqswnal_finalise (void) /**********************************************************************/ /* close elan comms */ #if MULTIRAIL_EKC + /* Shut down receivers first; rx callbacks might try sending... */ if (kqswnal_data.kqn_eprx_small != NULL) ep_free_rcvr (kqswnal_data.kqn_eprx_small); if (kqswnal_data.kqn_eprx_large != NULL) ep_free_rcvr (kqswnal_data.kqn_eprx_large); + /* NB ep_free_rcvr() returns only after we've freed off all receive + * buffers (see shutdown handling in kqswnal_requeue_rx()). This + * means we must have completed any messages we passed to + * lib_parse() or kpr_fwd_start(). */ + if (kqswnal_data.kqn_eptx != NULL) ep_free_xmtr (kqswnal_data.kqn_eptx); - /* freeing the xmtr completes all txs pdq */ + /* NB ep_free_xmtr() returns only after all outstanding transmits + * have called their callback... */ LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); #else + /* "Old" EKC just pretends to shutdown cleanly but actually + * provides no guarantees */ if (kqswnal_data.kqn_eprx_small != NULL) ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small); @@ -298,7 +294,6 @@ kqswnal_finalise (void) #endif /**********************************************************************/ /* flag threads to terminate, wake them and wait for them to die */ - kqswnal_data.kqn_shuttingdown = 2; wake_up_all (&kqswnal_data.kqn_sched_waitq); @@ -316,10 +311,12 @@ kqswnal_finalise (void) #if MULTIRAIL_EKC LASSERT (list_empty (&kqswnal_data.kqn_readyrxds)); + LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds)); + LASSERT (list_empty (&kqswnal_data.kqn_delayedfwds)); #endif /**********************************************************************/ - /* Complete any blocked forwarding packets with error + /* Complete any blocked forwarding packets, with error */ while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq)) @@ -327,23 +324,16 @@ kqswnal_finalise (void) kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, kpr_fwd_desc_t, kprfd_list); list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - } - - while (!list_empty (&kqswnal_data.kqn_delayedfwds)) - { - kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); + kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -ESHUTDOWN); } /**********************************************************************/ - /* Wait for router to complete any packets I sent her - */ + /* finalise router and portals lib */ kpr_deregister (&kqswnal_data.kqn_router); + if (do_lib_fini) + lib_fini (&kqswnal_lib); /**********************************************************************/ /* Unmap message buffers and free all descriptors and buffers @@ -464,7 +454,9 @@ kqswnal_finalise (void) } static int __init -kqswnal_initialise (void) +kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { #if MULTIRAIL_EKC EP_RAILMASK all_rails = EP_RAILMASK_ALL; @@ -474,22 +466,21 @@ kqswnal_initialise (void) int rc; int i; int elan_page_idx; + ptl_process_id_t my_process_id; int pkmem = atomic_read(&portal_kmemory); + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = kqswnal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } + LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - kqswnal_api.forward = kqswnal_forward; - kqswnal_api.shutdown = kqswnal_shutdown; - kqswnal_api.yield = kqswnal_yield; - kqswnal_api.validate = NULL; /* our api validate is a NOOP */ - kqswnal_api.lock = kqswnal_lock; - kqswnal_api.unlock = kqswnal_unlock; - kqswnal_api.nal_data = &kqswnal_data; - - kqswnal_lib.nal_data = &kqswnal_data; - memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success)); memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed)); #if MULTIRAIL_EKC @@ -500,9 +491,6 @@ kqswnal_initialise (void) /* ensure all pointers NULL etc */ memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; - kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD; - kqswnal_data.kqn_cb = &kqswnal_lib; INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); @@ -524,18 +512,19 @@ kqswnal_initialise (void) /* pointers/lists/locks initialised */ kqswnal_data.kqn_init = KQN_INIT_DATA; - + #if MULTIRAIL_EKC kqswnal_data.kqn_ep = ep_system(); if (kqswnal_data.kqn_ep == NULL) { CERROR("Can't initialise EKC\n"); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { CERROR("Can't get elan ID\n"); - kqswnal_finalise(); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } #else /**********************************************************************/ @@ -545,7 +534,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_ep == NULL) { CERROR ("Can't get elan device 0\n"); - return (-ENODEV); + kqswnal_shutdown(&kqswnal_api); + return (PTL_IFACE_INVALID); } #endif @@ -560,8 +550,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eptx == NULL) { CERROR ("Can't allocate transmitter\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /**********************************************************************/ @@ -573,8 +563,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eprx_small == NULL) { CERROR ("Can't install small msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep, @@ -583,8 +573,8 @@ kqswnal_initialise (void) if (kqswnal_data.kqn_eprx_large == NULL) { CERROR ("Can't install large msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /**********************************************************************/ @@ -598,8 +588,8 @@ kqswnal_initialise (void) EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve tx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); + kqswnal_shutdown(&kqswnal_api); + return (PTL_NO_SPACE); } #else dmareq.Waitfn = DDI_DMA_SLEEP; @@ -613,8 +603,8 @@ kqswnal_initialise (void) if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } #endif /**********************************************************************/ @@ -627,8 +617,8 @@ kqswnal_initialise (void) EP_PERM_WRITE); if (kqswnal_data.kqn_ep_tx_nmh == NULL) { CERROR("Can't reserve rx dma space\n"); - kqswnal_finalise(); - return (-ENOMEM); + kqswnal_shutdown(&kqswnal_api); + return (PTL_NO_SPACE); } #else dmareq.Waitfn = DDI_DMA_SLEEP; @@ -643,8 +633,8 @@ kqswnal_initialise (void) if (rc != DDI_SUCCESS) { CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } #endif /**********************************************************************/ @@ -654,8 +644,8 @@ kqswnal_initialise (void) sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); if (kqswnal_data.kqn_txds == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /* clear flags, null pointers etc */ @@ -670,8 +660,8 @@ kqswnal_initialise (void) PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); if (ktx->ktx_buffer == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } /* Map pre-allocated buffer NOW, to save latency on transmit */ @@ -707,8 +697,8 @@ kqswnal_initialise (void) sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE)); if (kqswnal_data.kqn_rxds == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */ @@ -742,8 +732,8 @@ kqswnal_initialise (void) struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) { - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_NO_SPACE); } krx->krx_kiov[j].kiov_page = page; @@ -787,15 +777,19 @@ kqswnal_initialise (void) /**********************************************************************/ /* Network interface ready to initialise */ - rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni); - if (rc != 0) + my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid); + my_process_id.pid = 0; + + rc = lib_init(&kqswnal_lib, my_process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { - CERROR ("PtlNIInit failed %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); + CERROR ("lib_init failed %d\n", rc); + kqswnal_shutdown (&kqswnal_api); + return (rc); } - kqswnal_data.kqn_init = KQN_INIT_PTL; + kqswnal_data.kqn_init = KQN_INIT_LIB; /**********************************************************************/ /* Queue receives, now that it's OK to run their completion callbacks */ @@ -816,21 +810,20 @@ kqswnal_initialise (void) if (rc != EP_SUCCESS) { CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } } /**********************************************************************/ /* Spawn scheduling threads */ - for (i = 0; i < smp_num_cpus; i++) - { + for (i = 0; i < num_online_cpus(); i++) { rc = kqswnal_thread_start (kqswnal_scheduler, NULL); if (rc != 0) { CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_finalise (); - return (rc); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } } @@ -839,19 +832,13 @@ kqswnal_initialise (void) rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); - rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); + rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_finalise (); - return (rc); + kqswnal_shutdown (&kqswnal_api); + return (PTL_FAIL); } -#if CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0); -#endif - - PORTAL_SYMBOL_REGISTER(kqswnal_ni); kqswnal_data.kqn_init = KQN_INIT_ALL; printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d " @@ -860,9 +847,61 @@ kqswnal_initialise (void) kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", pkmem); - return (0); + return (PTL_OK); +} + +void __exit +kqswnal_finalise (void) +{ +#if CONFIG_SYSCTL + if (kqswnal_tunables.kqn_sysctl != NULL) + unregister_sysctl_table (kqswnal_tunables.kqn_sysctl); +#endif + PtlNIFini(kqswnal_ni); + + ptl_unregister_nal(QSWNAL); } +static int __init +kqswnal_initialise (void) +{ + int rc; + + kqswnal_api.startup = kqswnal_startup; + kqswnal_api.shutdown = kqswnal_shutdown; + kqswnal_api.forward = kqswnal_forward; + kqswnal_api.yield = kqswnal_yield; + kqswnal_api.lock = kqswnal_lock; + kqswnal_api.unlock = kqswnal_unlock; + kqswnal_api.nal_data = &kqswnal_data; + + kqswnal_lib.nal_data = &kqswnal_data; + + /* Initialise dynamic tunables to defaults once only */ + kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS; + + rc = ptl_register_nal(QSWNAL, &kqswnal_api); + if (rc != PTL_OK) { + CERROR("Can't register QSWNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways, and the workaround for 'EKC blocks forever until + * the service is active' want the NAL started up at module load + * time... */ + rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(QSWNAL); + return (-ENODEV); + } + +#if CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + kqswnal_tunables.kqn_sysctl = + register_sysctl_table (kqswnal_top_ctl_table, 0); +#endif + return (0); +} MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01"); @@ -870,5 +909,3 @@ MODULE_LICENSE("GPL"); module_init (kqswnal_initialise); module_exit (kqswnal_finalise); - -EXPORT_SYMBOL (kqswnal_ni); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 5e32887..6978aa0 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -53,7 +53,11 @@ #include #include #include -#include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#include /* wait_on_buffer */ +#else +#include /* wait_on_buffer */ +#endif #include #include #include @@ -74,6 +78,7 @@ #include #include #include +#include #define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM @@ -194,17 +199,19 @@ typedef struct typedef struct { + /* dynamic tunables... */ + int kqn_optimized_gets; /* optimized GETs? */ +#if CONFIG_SYSCTL + struct ctl_table_header *kqn_sysctl; /* sysctl interface */ +#endif +} kqswnal_tunables_t; + +typedef struct +{ char kqn_init; /* what's been initialised */ char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads not terminated */ - atomic_t kqn_nthreads_running;/* # threads still running */ - - int kqn_optimized_gets; /* optimized GETs? */ - int kqn_copy_small_fwd; /* fwd small msgs from pre-allocated buffer? */ + atomic_t kqn_nthreads; /* # threads running */ -#if CONFIG_SYSCTL - struct ctl_table_header *kqn_sysctl; /* sysctl interface */ -#endif kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ @@ -214,6 +221,7 @@ typedef struct spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ + atomic_t kqn_pending_txs; /* # transmits being prepped */ spinlock_t kqn_sched_lock; /* serialise packet schedulers */ wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ @@ -247,12 +255,13 @@ typedef struct /* kqn_init state */ #define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ #define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 +#define KQN_INIT_LIB 2 #define KQN_INIT_ALL 3 -extern nal_cb_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_data_t kqswnal_data; +extern nal_cb_t kqswnal_lib; +extern nal_t kqswnal_api; +extern kqswnal_tunables_t kqswnal_tunables; +extern kqswnal_data_t kqswnal_data; /* global pre-prepared replies to keep off the stack */ extern EP_STATUSBLK kqswnal_rpc_success; diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 61c88f6..2bcb853 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -426,7 +426,8 @@ kqswnal_put_idle_tx (kqswnal_tx_t *ktx) list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); /* anything blocking for a tx descriptor? */ - if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ + if (!kqswnal_data.kqn_shuttingdown && + !list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ { CDEBUG(D_NET,"wakeup fwd\n"); @@ -460,6 +461,9 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) for (;;) { spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); + if (kqswnal_data.kqn_shuttingdown) + break; + /* "normal" descriptor is free */ if (!list_empty (&kqswnal_data.kqn_idletxds)) { ktx = list_entry (kqswnal_data.kqn_idletxds.next, @@ -467,14 +471,8 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) break; } - /* "normal" descriptor pool is empty */ - - if (fwd != NULL) { /* forwarded packet => queue for idle txd */ - CDEBUG (D_NET, "blocked fwd [%p]\n", fwd); - list_add_tail (&fwd->kprfd_list, - &kqswnal_data.kqn_idletxd_fwdq); + if (fwd != NULL) /* forwarded packet? */ break; - } /* doing a local transmit */ if (!may_block) { @@ -494,13 +492,20 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) CDEBUG (D_NET, "blocking for tx desc\n"); wait_event (kqswnal_data.kqn_idletxd_waitq, - !list_empty (&kqswnal_data.kqn_idletxds)); + !list_empty (&kqswnal_data.kqn_idletxds) || + kqswnal_data.kqn_shuttingdown); } if (ktx != NULL) { list_del (&ktx->ktx_list); list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); ktx->ktx_launcher = current->pid; + atomic_inc(&kqswnal_data.kqn_pending_txs); + } else if (fwd != NULL) { + /* queue forwarded packet until idle txd available */ + CDEBUG (D_NET, "blocked fwd [%p]\n", fwd); + list_add_tail (&fwd->kprfd_list, + &kqswnal_data.kqn_idletxd_fwdq); } spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); @@ -601,6 +606,9 @@ kqswnal_launch (kqswnal_tx_t *ktx) ktx->ktx_launchtime = jiffies; + if (kqswnal_data.kqn_shuttingdown) + return (-ESHUTDOWN); + LASSERT (dest >= 0); /* must be a peer */ if (ktx->ktx_state == KTX_GETTING) { /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t. The @@ -635,8 +643,6 @@ kqswnal_launch (kqswnal_tx_t *ktx) return (0); case EP_ENOMEM: /* can't allocate ep txd => queue for later */ - LASSERT (in_interrupt()); - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); list_add_tail (&ktx->ktx_delayed_list, &kqswnal_data.kqn_delayedtxds); @@ -921,7 +927,7 @@ kqswnal_sendmsg (nal_cb_t *nal, LASSERT (payload_kiov == NULL || !in_interrupt ()); /* payload is either all vaddrs or all pages */ LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - + if (payload_nob > KQSW_MAXPAYLOAD) { CERROR ("request exceeds MTU size "LPSZ" (max %u).\n", payload_nob, KQSW_MAXPAYLOAD); @@ -967,19 +973,17 @@ kqswnal_sendmsg (nal_cb_t *nal, "nid "LPX64" via "LPX64" elanID %d\n", nid, targetnid, ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)); - return (PTL_FAIL); + rc = -EINVAL; + goto out; } /* peer expects RPC completion with GET data */ rc = kqswnal_dma_reply (ktx, payload_niov, payload_iov, payload_kiov, payload_offset, payload_nob); - if (rc == 0) - return (PTL_OK); - - CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc); - kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); + if (rc != 0) + CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc); + goto out; } memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ @@ -1023,7 +1027,7 @@ kqswnal_sendmsg (nal_cb_t *nal, memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum)); #endif - if (kqswnal_data.kqn_optimized_gets && + if (kqswnal_tunables.kqn_optimized_gets && type == PTL_MSG_GET && /* doing a GET */ nid == targetnid) { /* not forwarding */ lib_md_t *md = libmsg->md; @@ -1052,11 +1056,8 @@ kqswnal_sendmsg (nal_cb_t *nal, else rc = kqswnal_map_tx_iov (ktx, 0, md->length, md->md_niov, md->md_iov.iov); - - if (rc < 0) { - kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); - } + if (rc != 0) + goto out; rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1; @@ -1119,25 +1120,26 @@ kqswnal_sendmsg (nal_cb_t *nal, else rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob, payload_niov, payload_iov); - if (rc != 0) { - kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); - } + if (rc != 0) + goto out; } ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE; rc = kqswnal_launch (ktx); - if (rc != 0) { /* failed? */ - CERROR ("Failed to send packet to "LPX64": %d\n", targetnid, rc); + + out: + CDEBUG(rc == 0 ? D_NET : D_ERROR, + "%s "LPSZ" bytes to "LPX64" via "LPX64": rc %d\n", + rc == 0 ? "Sent" : "Failed to send", + payload_nob, nid, targetnid, rc); + + if (rc != 0) kqswnal_put_idle_tx (ktx); - return (PTL_FAIL); - } - CDEBUG(D_NET, "sent "LPSZ" bytes to "LPX64" via "LPX64"\n", - payload_nob, nid, targetnid); - return (PTL_OK); + atomic_dec(&kqswnal_data.kqn_pending_txs); + return (rc == 0 ? PTL_OK : PTL_FAIL); } static ptl_err_t @@ -1204,7 +1206,7 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) if (kqswnal_nid2elanid (nid) < 0) { CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid); rc = -EHOSTUNREACH; - goto failed; + goto out; } /* copy hdr into pre-mapped buffer */ @@ -1244,20 +1246,20 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) #endif rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov); if (rc != 0) - goto failed; + goto out; } rc = kqswnal_launch (ktx); - if (rc == 0) - return; + out: + if (rc != 0) { + CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc); - failed: - LASSERT (rc != 0); - CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc); + kqswnal_put_idle_tx (ktx); + /* complete now (with failure) */ + kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc); + } - kqswnal_put_idle_tx (ktx); - /* complete now (with failure) */ - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc); + atomic_dec(&kqswnal_data.kqn_pending_txs); } void @@ -1727,7 +1729,6 @@ kqswnal_thread_start (int (*fn)(void *arg), void *arg) return ((int)pid); atomic_inc (&kqswnal_data.kqn_nthreads); - atomic_inc (&kqswnal_data.kqn_nthreads_running); return (0); } @@ -1746,7 +1747,6 @@ kqswnal_scheduler (void *arg) long flags; int rc; int counter = 0; - int shuttingdown = 0; int did_something; kportal_daemonize ("kqswnal_sched"); @@ -1756,18 +1756,6 @@ kqswnal_scheduler (void *arg) for (;;) { - if (kqswnal_data.kqn_shuttingdown != shuttingdown) { - - if (kqswnal_data.kqn_shuttingdown == 2) - break; - - /* During stage 1 of shutdown we are still responsive - * to receives */ - - atomic_dec (&kqswnal_data.kqn_nthreads_running); - shuttingdown = kqswnal_data.kqn_shuttingdown; - } - did_something = 0; if (!list_empty (&kqswnal_data.kqn_readyrxds)) @@ -1784,8 +1772,7 @@ kqswnal_scheduler (void *arg) spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); } - if (!shuttingdown && - !list_empty (&kqswnal_data.kqn_delayedtxds)) + if (!list_empty (&kqswnal_data.kqn_delayedtxds)) { ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, kqswnal_tx_t, ktx_list); @@ -1794,31 +1781,31 @@ kqswnal_scheduler (void *arg) flags); rc = kqswnal_launch (ktx); - if (rc != 0) /* failed: ktx_nid down? */ - { + if (rc != 0) { CERROR("Failed delayed transmit to "LPX64 ": %d\n", ktx->ktx_nid, rc); kqswnal_tx_done (ktx, rc); } + atomic_dec (&kqswnal_data.kqn_pending_txs); did_something = 1; spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); } - if (!shuttingdown & - !list_empty (&kqswnal_data.kqn_delayedfwds)) + if (!list_empty (&kqswnal_data.kqn_delayedfwds)) { fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list); list_del (&fwd->kprfd_list); spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); + /* If we're shutting down, this will just requeue fwd on kqn_idletxd_fwdq */ kqswnal_fwd_packet (NULL, fwd); did_something = 1; spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); } - /* nothing to do or hogging CPU */ + /* nothing to do or hogging CPU */ if (!did_something || counter++ == KQSW_RESCHED) { spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); @@ -1826,21 +1813,24 @@ kqswnal_scheduler (void *arg) counter = 0; if (!did_something) { + if (kqswnal_data.kqn_shuttingdown == 2) { + /* We only exit in stage 2 of shutdown when + * there's nothing left to do */ + break; + } rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown != shuttingdown || + kqswnal_data.kqn_shuttingdown == 2 || !list_empty(&kqswnal_data.kqn_readyrxds) || !list_empty(&kqswnal_data.kqn_delayedtxds) || !list_empty(&kqswnal_data.kqn_delayedfwds)); LASSERT (rc == 0); - } else if (current->need_resched) + } else if (need_resched()) schedule (); spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); } } - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - kqswnal_thread_fini (); return (0); } diff --git a/lnet/klnds/scimaclnd/.cvsignore b/lnet/klnds/scimaclnd/.cvsignore index e995588..48b17e9 100644 --- a/lnet/klnds/scimaclnd/.cvsignore +++ b/lnet/klnds/scimaclnd/.cvsignore @@ -1,3 +1,10 @@ .deps Makefile -Makefile.in +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.*.cmd +.tmp_versions +.depend diff --git a/lnet/klnds/scimaclnd/Makefile.am b/lnet/klnds/scimaclnd/Makefile.am deleted file mode 100644 index 6da31f0..0000000 --- a/lnet/klnds/scimaclnd/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kscimacnal -modulenet_DATA = kscimacnal.o -EXTRA_PROGRAMS = kscimacnal - -DEFS = -kscimacnal_SOURCES = scimacnal.c scimacnal_cb.c scimacnal.h diff --git a/lnet/klnds/scimaclnd/Makefile.in b/lnet/klnds/scimaclnd/Makefile.in new file mode 100644 index 0000000..c7eb4ac --- /dev/null +++ b/lnet/klnds/scimaclnd/Makefile.in @@ -0,0 +1,6 @@ +MODULES := kscimacnal +kscimacnal-objs := scimacnal.o scimacnal_cb.o + +EXTRA_PRE_CFLAGS += @SCIMACCPPFLAGS@ + +@INCLUDE_RULES@ diff --git a/lnet/klnds/scimaclnd/autoMakefile.am b/lnet/klnds/scimaclnd/autoMakefile.am new file mode 100644 index 0000000..47433cd --- /dev/null +++ b/lnet/klnds/scimaclnd/autoMakefile.am @@ -0,0 +1,13 @@ +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +if !CRAY_PORTALS +if BUILD_SCIMACNAL +modulenet_DATA = kscimacnal$(KMODEXT) +endif +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(ksimacnal-objs:%.o=%.c) scimacnal.h diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c index 35de6eb..e77bd8e 100644 --- a/lnet/klnds/scimaclnd/scimacnal.c +++ b/lnet/klnds/scimaclnd/scimacnal.c @@ -26,7 +26,6 @@ #include "scimacnal.h" -ptl_handle_ni_t kscimacnal_ni; nal_t kscimacnal_api; kscimacnal_data_t kscimacnal_data; @@ -101,10 +100,34 @@ static void kscimacnal_unlock(nal_t *nal, unsigned long *flags) } -static int kscimacnal_shutdown(nal_t *nal, int ni) +static void kscimacnal_shutdown(nal_t *nal, int ni) { LASSERT (nal == &kscimacnal_api); - return 0; + LASSERT (kscimacnal_data.ksci_init); + + if (nal->nal_refct != 0) + return; + + /* Called on last matching PtlNIFini() */ + + /* FIXME: How should the shutdown procedure really look? + */ + kscimacnal_data.ksci_shuttingdown=1; + + /* Stop handling ioctls */ + libcfs_nal_cmd_unregister(SCIMACNAL); + + mac_finish(kscimacnal_data.ksci_machandle); + + /* finalise lib after net shuts up */ + lib_fini(&kscimacnal_lib); + + kscimacnal_data.ksci_init = 0; + + /* Allow unload */ + PORTAL_MODULE_UNUSE; + + return; } @@ -123,56 +146,26 @@ static void kscimacnal_yield( nal_t *nal, unsigned long *flags, int milliseconds } -static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */ - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" nnids %d\n", kscimacnal_data.ksci_nid, nnids); - lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size); - return &kscimacnal_api; -} - - -/* Called by kernel at module unload time */ -static void /*__exit*/ -kscimacnal_finalize(void) -{ - /* FIXME: How should the shutdown procedure really look? */ - kscimacnal_data.ksci_shuttingdown=1; - - PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni); - - PtlNIFini(kscimacnal_ni); - lib_fini(&kscimacnal_lib); - - mac_finish(kscimacnal_data.ksci_machandle); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - - -/* Called by kernel at module insertion time */ -static int __init -kscimacnal_initialize(void) +static int kscimacnal_startup(nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { int rc; - unsigned long nid=0; + mac_physaddr_t mac_physaddr; + ptl_process_id_t process_id; mac_handle_t *machandle = NULL; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = kscimacnal_lib.ni.actual_limits; + return (PTL_OK); + } - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kscimacnal_api.forward = kscimacnal_forward; - kscimacnal_api.shutdown = kscimacnal_shutdown; - kscimacnal_api.yield = kscimacnal_yield; - kscimacnal_api.validate = NULL; /* our api validate is a NOOP */ - kscimacnal_api.lock= kscimacnal_lock; - kscimacnal_api.unlock= kscimacnal_unlock; - kscimacnal_api.nal_data = &kscimacnal_data; + /* Called on first PtlNIInit(SCIMACNAL) */ + LASSERT (nal == kscimacnal_api); + LASSERT (!kscimacnal_data.ksci_init); + kscimacnal_lib.nal_data = &kscimacnal_data; memset(&kscimacnal_data, 0, sizeof(kscimacnal_data)); @@ -188,7 +181,7 @@ kscimacnal_initialize(void) if(!machandle) { CERROR("mac_init() failed\n"); - return -1; + return PTL_FAIL; } kscimacnal_data.ksci_machandle = machandle; @@ -199,45 +192,88 @@ kscimacnal_initialize(void) mac_get_mtusize(machandle), SCIMACNAL_MTU); CERROR("Consult README.scimacnal for more information\n"); mac_finish(machandle); - return -1; + return PTL_FAIL; } /* Get the node ID */ /* mac_get_physaddrlen() is a function instead of define, sigh */ - LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid)); - if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) { + LASSERT(mac_get_physaddrlen(machandle) <= sizeof(mac_physaddr)); + if(mac_get_physaddr(machandle, &mac_physaddr)) { CERROR("mac_get_physaddr() failed\n"); mac_finish(machandle); - return -1; + return PTL_FAIL; } - nid = ntohl(nid); - kscimacnal_data.ksci_nid = nid; + kscimacnal_data.ksci_nid = (ptl_nid_t)(ntohl(mac_physaddr)); + process_id.pid = 0; + process_id.nid = kscimacnal_data.ksci_nid; - /* Initialize Network Interface */ - /* FIXME: What do the magic numbers mean? Documentation anyone? */ - rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni); - if (rc) { + CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", + kscimacnal_data.ksci_nid); + + rc = lib_init(&kscimacnal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { CERROR("PtlNIInit failed %d\n", rc); mac_finish(machandle); - return (-ENOMEM); + return (rc); } /* Init command interface */ - rc = kportal_nal_register (SCIMACNAL, &kscimacnal_cmd, NULL); + rc = libcfs_nal_cmd_register (SCIMACNAL, &kscimacnal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - PtlNIFini(kscimacnal_ni); + lib_fini(&kscimacnal_lib); mac_finish(machandle); - return (rc); + return (PTL_FAIL); } - - PORTAL_SYMBOL_REGISTER(kscimacnal_ni); - /* We're done now, it's OK for the RX callback to do stuff */ kscimacnal_data.ksci_init = 1; + /* Prevent unload before matching PtlNIFini() */ + PORTAL_MODULE_USE; + + return (PTL_OK); +} + + +/* Called by kernel at module unload time */ +static void /*__exit*/ +kscimacnal_finalize(void) +{ + LASSERT (!kscimacnal_data.ksci_init); + + ptl_unregister_nal(SCIMACNAL); + + CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); + + return; +} + + +/* Called by kernel at module insertion time */ +static int __init +kscimacnal_initialize(void) +{ + int rc; + + CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); + + kscimacnal_api.startup = kscimacnal_startup; + kscimacnal_api.forward = kscimacnal_forward; + kscimacnal_api.shutdown = kscimacnal_shutdown; + kscimacnal_api.yield = kscimacnal_yield; + kscimacnal_api.lock= kscimacnal_lock; + kscimacnal_api.unlock= kscimacnal_unlock; + kscimacnal_api.nal_data = &kscimacnal_data; + + rc = ptl_register_nal(SCIMACNAL, &kscimacnal_api); + if (rc != PTL_OK) { + CERROR("Can't register SCIMACNAL: %d\n", rc); + return (-ENODEV); + } + return 0; } diff --git a/lnet/klnds/socklnd/.cvsignore b/lnet/klnds/socklnd/.cvsignore index 95973d6..5ed596b 100644 --- a/lnet/klnds/socklnd/.cvsignore +++ b/lnet/klnds/socklnd/.cvsignore @@ -1,4 +1,10 @@ .deps Makefile -Makefile.in -.*.o.cmd +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lnet/klnds/socklnd/Makefile.am b/lnet/klnds/socklnd/Makefile.am deleted file mode 100644 index acdba5e..0000000 --- a/lnet/klnds/socklnd/Makefile.am +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = ksocknal -modulenet_DATA = ksocknal.o -EXTRA_PROGRAMS = ksocknal - -# If you don't build with -O2, your modules won't insert, becahse htonl is -# just special that way. -AM_CFLAGS = -O2 - -DEFS = -ksocknal_SOURCES = socknal.c socknal_cb.c socknal.h diff --git a/lnet/klnds/socklnd/Makefile.in b/lnet/klnds/socklnd/Makefile.in new file mode 100644 index 0000000..633b455 --- /dev/null +++ b/lnet/klnds/socklnd/Makefile.in @@ -0,0 +1,8 @@ +MODULES := ksocknal +ksocknal-objs := socknal.o socknal_cb.o + +# If you don't build with -O2, your modules won't insert, becahse htonl is +# just special that way. +EXTRA_POST_CFLAGS := -O2 + +@INCLUDE_RULES@ diff --git a/lnet/klnds/socklnd/autoMakefile.am b/lnet/klnds/socklnd/autoMakefile.am new file mode 100644 index 0000000..070b649 --- /dev/null +++ b/lnet/klnds/socklnd/autoMakefile.am @@ -0,0 +1,13 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +if !CRAY_PORTALS +modulenet_DATA = ksocknal$(KMODEXT) +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal.h diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index d874a6c..32bbbec 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -25,13 +25,10 @@ #include "socknal.h" +nal_t ksocknal_api; +ksock_nal_data_t ksocknal_data; ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif +ksock_tunables_t ksocknal_tunables; kpr_nal_interface_t ksocknal_router_interface = { kprni_nalid: SOCKNAL, @@ -40,6 +37,7 @@ kpr_nal_interface_t ksocknal_router_interface = { kprni_notify: ksocknal_notify, }; +#ifdef CONFIG_SYSCTL #define SOCKNAL_SYSCTL 200 #define SOCKNAL_SYSCTL_TIMEOUT 1 @@ -50,21 +48,21 @@ kpr_nal_interface_t ksocknal_router_interface = { static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_data.ksnd_io_timeout, sizeof (int), + &ksocknal_tunables.ksnd_io_timeout, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_data.ksnd_eager_ack, sizeof (int), + &ksocknal_tunables.ksnd_eager_ack, sizeof (int), 0644, NULL, &proc_dointvec}, #if SOCKNAL_ZC {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_data.ksnd_zc_min_frag, sizeof (int), + &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_data.ksnd_typed_conns, sizeof (int), + &ksocknal_tunables.ksnd_typed_conns, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_data.ksnd_min_bulk, sizeof (int), + &ksocknal_tunables.ksnd_min_bulk, sizeof (int), 0644, NULL, &proc_dointvec}, { 0 } }; @@ -73,6 +71,7 @@ static ctl_table ksocknal_top_ctl_table[] = { {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, { 0 } }; +#endif int ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, @@ -88,12 +87,6 @@ ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, return PTL_OK; } -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - return PTL_OK; -} - void ksocknal_api_lock(nal_t *nal, unsigned long *flags) { @@ -154,19 +147,6 @@ ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds) return (milliseconds); } -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); -} - -/* - * EXTRA functions follow - */ - int ksocknal_set_mynid(ptl_nid_t nid) { @@ -832,7 +812,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; list_add (&conn->ksnc_list, &peer->ksnp_conns); atomic_inc (&conn->ksnc_refcount); @@ -1466,30 +1446,34 @@ ksocknal_free_buffers (void) } void -ksocknal_module_fini (void) +ksocknal_api_shutdown (nal_t *nal) { int i; + if (nal->nal_refct != 0) { + /* This module got the first ref */ + PORTAL_MODULE_UNUSE; + return; + } + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); + LASSERT(nal == &ksocknal_api); + switch (ksocknal_data.ksnd_init) { default: LASSERT (0); case SOCKNAL_INIT_ALL: -#if CONFIG_SYSCTL - if (ksocknal_data.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_data.ksnd_sysctl); -#endif - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); + libcfs_nal_cmd_unregister(SOCKNAL); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; /* fall through */ - case SOCKNAL_INIT_PTL: + case SOCKNAL_INIT_LIB: /* No more calls to ksocknal_cmd() to create new * autoroutes/connections since we're being unloaded. */ - PtlNIFini(ksocknal_ni); /* Delete all autoroute entries */ ksocknal_del_route(PTL_NID_ANY, 0, 0, 0); @@ -1510,6 +1494,8 @@ ksocknal_module_fini (void) /* Tell lib we've stopped calling into her. */ lib_fini(&ksocknal_lib); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; /* fall through */ case SOCKNAL_INIT_DATA: @@ -1557,6 +1543,8 @@ ksocknal_module_fini (void) kpr_deregister (&ksocknal_data.ksnd_router); ksocknal_free_buffers(); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; /* fall through */ case SOCKNAL_INIT_NOTHING: @@ -1571,7 +1559,7 @@ ksocknal_module_fini (void) } -void __init +void ksocknal_init_incarnation (void) { struct timeval tv; @@ -1587,42 +1575,31 @@ ksocknal_init_incarnation (void) (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } -int __init -ksocknal_module_init (void) +int +ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; + ptl_process_id_t process_id; + int pkmem = atomic_read(&portal_kmemory); + int rc; + int i; + int j; - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT (nal == &ksocknal_api); - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = ksocknal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } - ksocknal_lib.nal_data = &ksocknal_data; + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; -#if SOCKNAL_ZC - ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; -#endif ksocknal_init_incarnation(); ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; @@ -1669,7 +1646,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1685,15 +1662,19 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); + /* NB we have to wait to be told our true NID... */ + process_id.pid = 0; + process_id.nid = 0; + + rc = lib_init(&ksocknal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { + CERROR("lib_init failed: error %d\n", rc); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PtlNIDebug(ksocknal_ni, ~0); - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called for (i = 0; i < SOCKNAL_N_SCHED; i++) { rc = ksocknal_thread_start (ksocknal_scheduler, @@ -1701,7 +1682,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1710,7 +1691,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1718,7 +1699,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_reaper, NULL); if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } @@ -1728,7 +1709,7 @@ ksocknal_module_init (void) CDEBUG(D_NET, "Can't initialise routing interface " "(rc = %d): not routing\n", rc); } else { - /* Only allocate forwarding buffers if I'm on a gateway */ + /* Only allocate forwarding buffers if there's a router */ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { @@ -1744,7 +1725,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { - ksocknal_module_fini(); + ksocknal_api_shutdown(&ksocknal_api); return (-ENOMEM); } @@ -1754,7 +1735,7 @@ ksocknal_module_init (void) fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); if (fmb->fmb_kiov[j].kiov_page == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1765,19 +1746,13 @@ ksocknal_module_init (void) } } - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); + rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - -#ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); -#endif /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; @@ -1789,6 +1764,75 @@ ksocknal_module_init (void) return (0); } +void __exit +ksocknal_module_fini (void) +{ +#ifdef CONFIG_SYSCTL + if (ksocknal_tunables.ksnd_sysctl != NULL) + unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); +#endif + PtlNIFini(ksocknal_ni); + + ptl_unregister_nal(SOCKNAL); +} + +int __init +ksocknal_module_init (void) +{ + int rc; + + /* packet descriptor must fit in a router descriptor's scratchpad */ + LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); + /* the following must be sizeof(int) for proc_dointvec() */ + LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); +#if SOCKNAL_ZC + LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); +#endif + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + + ksocknal_api.startup = ksocknal_api_startup; + ksocknal_api.forward = ksocknal_api_forward; + ksocknal_api.shutdown = ksocknal_api_shutdown; + ksocknal_api.lock = ksocknal_api_lock; + ksocknal_api.unlock = ksocknal_api_unlock; + ksocknal_api.nal_data = &ksocknal_data; + + ksocknal_lib.nal_data = &ksocknal_data; + + /* Initialise dynamic tunables to defaults once only */ + ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; + ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK; +#if SOCKNAL_ZC + ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; +#endif + + rc = ptl_register_nal(SOCKNAL, &ksocknal_api); + if (rc != PTL_OK) { + CERROR("Can't register SOCKNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways want the NAL started up at module load time... */ + rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(SOCKNAL); + return (-ENODEV); + } + +#ifdef CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + ksocknal_tunables.ksnd_sysctl = + register_sysctl_table (ksocknal_top_ctl_table, 0); +#endif + return (0); +} + MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); MODULE_LICENSE("GPL"); @@ -1796,4 +1840,3 @@ MODULE_LICENSE("GPL"); module_init(ksocknal_module_init); module_exit(ksocknal_module_fini); -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 50ff5ce..87b23dc 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -64,6 +64,7 @@ #include #include #include +#include #include #if CONFIG_SMP @@ -141,7 +142,6 @@ typedef struct { } ksock_irqinfo_t; typedef struct { - int ksnd_init; /* initialisation state */ int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */ int ksnd_eager_ack; /* make TCP ack eagerly? */ int ksnd_typed_conns; /* drive sockets by type? */ @@ -150,6 +150,10 @@ typedef struct { unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ +} ksock_tunables_t; + +typedef struct { + int ksnd_init; /* initialisation state */ __u64 ksnd_incarnation; /* my epoch */ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ @@ -194,7 +198,7 @@ typedef struct { #define SOCKNAL_INIT_NOTHING 0 #define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 +#define SOCKNAL_INIT_LIB 2 #define SOCKNAL_INIT_ALL 3 /* A packet just assembled for transmission is represented by 1 or more @@ -362,6 +366,7 @@ typedef struct ksock_peer extern nal_cb_t ksocknal_lib; extern ksock_nal_data_t ksocknal_data; +extern ksock_tunables_t ksocknal_tunables; static inline struct list_head * ksocknal_nid2peerlist (ptl_nid_t nid) diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index ebb32da..21e0abe 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -24,6 +24,9 @@ */ #include "socknal.h" +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +# include +#endif /* * LIB functions follow @@ -262,7 +265,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) LASSERT (tx->tx_nkiov > 0); #if SOCKNAL_ZC - if (fragsize >= ksocknal_data.ksnd_zc_min_frag && + if (fragsize >= ksocknal_tunables.ksnd_zc_min_frag && (sock->sk->route_caps & NETIF_F_SG) && (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { @@ -381,7 +384,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) * is set. Instead, we presume peer death has occurred if * the socket doesn't drain within a timout */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; conn->ksnc_peer->ksnp_last_alive = jiffies; } while (tx->tx_resid != 0); @@ -444,7 +447,7 @@ ksocknal_recv_iov (ksock_conn_t *conn) /* received something... */ conn->ksnc_peer->ksnp_last_alive = jiffies; conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -503,7 +506,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) /* received something... */ conn->ksnc_peer->ksnp_last_alive = jiffies; conn->ksnc_rx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -562,7 +565,7 @@ ksocknal_receive (ksock_conn_t *conn) if (conn->ksnc_rx_nob_wanted == 0) { /* Completed a message segment (header or payload) */ - if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 && + if ((ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 && (conn->ksnc_rx_state == SOCKNAL_RX_BODY || conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { /* Remind the socket to ack eagerly... */ @@ -723,7 +726,7 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES); LASSERT (!route->ksnr_connecting); - if (ksocknal_data.ksnd_typed_conns) + if (ksocknal_tunables.ksnd_typed_conns) route->ksnr_connecting = KSNR_TYPED_ROUTES & ~route->ksnr_connected; else @@ -797,7 +800,7 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) fnob = nob; } - if (!ksocknal_data.ksnd_typed_conns) + if (!ksocknal_tunables.ksnd_typed_conns) continue; switch (c->ksnc_type) { @@ -808,11 +811,11 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) case SOCKNAL_CONN_BULK_IN: continue; case SOCKNAL_CONN_BULK_OUT: - if (tx->tx_nob < ksocknal_data.ksnd_min_bulk) + if (tx->tx_nob < ksocknal_tunables.ksnd_min_bulk) continue; break; case SOCKNAL_CONN_CONTROL: - if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk) + if (tx->tx_nob >= ksocknal_tunables.ksnd_min_bulk) continue; break; } @@ -856,7 +859,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) spin_lock_irqsave (&sched->kss_lock, flags); conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; mb(); /* order with list_add_tail */ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); @@ -2182,7 +2185,7 @@ ksocknal_setup_sock (struct socket *sock) /* Keepalives: If 3/4 of the timeout elapses, start probing every * second until the timeout elapses. */ - option = (ksocknal_data.ksnd_io_timeout * 3) / 4; + option = (ksocknal_tunables.ksnd_io_timeout * 3) / 4; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, (char *)&option, sizeof (option)); @@ -2202,7 +2205,7 @@ ksocknal_setup_sock (struct socket *sock) return (rc); } - option = ksocknal_data.ksnd_io_timeout / 4; + option = ksocknal_tunables.ksnd_io_timeout / 4; set_fs (KERNEL_DS); rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, (char *)&option, sizeof (option)); @@ -2259,7 +2262,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) /* Set the socket timeouts, so our connection attempt completes in * finite time */ - tv.tv_sec = ksocknal_data.ksnd_io_timeout; + tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; tv.tv_usec = 0; set_fs (KERNEL_DS); @@ -2268,7 +2271,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) set_fs (oldmm); if (rc != 0) { CERROR ("Can't set send timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); + ksocknal_tunables.ksnd_io_timeout, rc); goto out; } @@ -2278,7 +2281,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) set_fs (oldmm); if (rc != 0) { CERROR ("Can't set receive timeout %d: %d\n", - ksocknal_data.ksnd_io_timeout, rc); + ksocknal_tunables.ksnd_io_timeout, rc); goto out; } @@ -2652,9 +2655,9 @@ ksocknal_reaper (void *arg) * timeout on any connection within (n+1)/n times the * timeout interval. */ - if (ksocknal_data.ksnd_io_timeout > n * p) + if (ksocknal_tunables.ksnd_io_timeout > n * p) chunk = (chunk * n * p) / - ksocknal_data.ksnd_io_timeout; + ksocknal_tunables.ksnd_io_timeout; if (chunk == 0) chunk = 1; diff --git a/lnet/libcfs/.cvsignore b/lnet/libcfs/.cvsignore index 7fa686f..c6f0aa4 100644 --- a/lnet/libcfs/.cvsignore +++ b/lnet/libcfs/.cvsignore @@ -1,5 +1,11 @@ .deps Makefile -Makefile.in link-stamp -.*.o.cmd +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lnet/libcfs/Makefile.am b/lnet/libcfs/Makefile.am deleted file mode 100644 index 4f0b303..0000000 --- a/lnet/libcfs/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2001, 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -MODULE = libcfs -modulenet_DATA = libcfs.o -EXTRA_PROGRAMS = libcfs - -DISTCLEANFILES = *.orig *.rej - -DEFS = -libcfs_SOURCES = module.c proc.c debug.c lwt.c - -# Don't distribute any patched files. -dist-hook: - list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done - -include ../Rules.linux diff --git a/lnet/libcfs/Makefile.in b/lnet/libcfs/Makefile.in new file mode 100644 index 0000000..6f9c981 --- /dev/null +++ b/lnet/libcfs/Makefile.in @@ -0,0 +1,4 @@ +MODULES = libcfs +libcfs-objs := debug.o lwt.o module.o proc.o + +@INCLUDE_RULES@ diff --git a/lnet/libcfs/autoMakefile.am b/lnet/libcfs/autoMakefile.am new file mode 100644 index 0000000..192c3ef --- /dev/null +++ b/lnet/libcfs/autoMakefile.am @@ -0,0 +1,11 @@ +# Copyright (C) 2001, 2002 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +modulenet_DATA := libcfs$(KMODEXT) +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(libcfs-objs:%.o=%.c) diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index be117b8..2b2ce3b 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -45,6 +45,7 @@ #include #include #include +#include # define DEBUG_SUBSYSTEM S_PORTALS @@ -52,6 +53,10 @@ #include #include +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +#include +#endif + unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL | S_IBNAL); EXPORT_SYMBOL(portal_subsystem_debug); @@ -79,7 +84,7 @@ static char *debug_buf = NULL; static unsigned long debug_size = 0; static atomic_t debug_off_a = ATOMIC_INIT(0); static int debug_wrapped; -wait_queue_head_t debug_ctlwq; +static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); #define DAEMON_SND_SIZE (64 << 10) /* @@ -266,7 +271,7 @@ int portals_do_debug_dumplog(void *arg) PTR_ERR(file)); GOTO(out, PTR_ERR(file)); } else { - printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n", + printk(KERN_ALERT "LustreError: dumping log to %s ...\n", debug_file_name); } @@ -439,18 +444,26 @@ void portals_debug_print(void) void portals_debug_dumplog(void) { int rc; + DECLARE_WAITQUEUE(wait, current); ENTRY; - init_waitqueue_head(&debug_ctlwq); + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&debug_ctlwq, &wait); rc = kernel_thread(portals_do_debug_dumplog, NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { + if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); - return; - } - sleep_on(&debug_ctlwq); + else + schedule(); + + /* be sure to teardown if kernel_thread() failed */ + remove_wait_queue(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); } int portals_debug_daemon_start(char *file, unsigned int size) @@ -592,7 +605,7 @@ int portals_debug_init(unsigned long bufsize) debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW); if (debug_buf == NULL) return -ENOMEM; - memset(debug_buf, 0, debug_size); + memset(debug_buf, 0, bufsize + DEBUG_OVERFLOW); debug_wrapped = 0; //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", @@ -686,7 +699,7 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len) rc = -ENOMEM; goto cleanup; } - list_add(&page->list, &my_pages); + list_add(&PAGE_LIST(page), &my_pages); } spin_lock_irqsave(&portals_debug_lock, flags); @@ -711,7 +724,7 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len) unsigned long to_copy; void *addr; - page = list_entry(pos, struct page, list); + page = list_entry(pos, struct page, PAGE_LIST_ENTRY); to_copy = min(total - off, PAGE_SIZE); if (to_copy == 0) { off = 0; @@ -740,7 +753,7 @@ finish_partial: off = 0; list_for_each(pos, &my_pages) { unsigned long to_copy; - page = list_entry(pos, struct page, list); + page = list_entry(pos, struct page, PAGE_LIST_ENTRY); to_copy = min(copied - off, PAGE_SIZE); rc = copy_to_user(buf + off, kmap(page), to_copy); @@ -757,8 +770,8 @@ finish_partial: cleanup: list_for_each_safe(pos, n, &my_pages) { - page = list_entry(pos, struct page, list); - list_del(&page->list); + page = list_entry(pos, struct page, PAGE_LIST_ENTRY); + list_del(&PAGE_LIST(page)); __free_page(page); } return rc; @@ -949,23 +962,26 @@ void portals_run_lbug_upcall(char *file, const char *fn, const int line) char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { switch(nal){ -/* XXX this should be a nal method of some sort */ +/* XXX this could be a nal method of some sort, 'cept it's config + * dependent whether (say) socknal NIDs are actually IP addresses... */ #ifndef CRAY_PORTALS case TCPNAL: /* userspace NAL */ case SOCKNAL: - sprintf(str, "%u:%d.%d.%d.%d", (__u32)(nid >> 32), - HIPQUAD(nid)); + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", + (__u32)(nid >> 32), HIPQUAD(nid)); break; case QSWNAL: case GMNAL: case IBNAL: - case SCIMACNAL: - sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid); + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", + (__u32)(nid >> 32), (__u32)nid); break; #endif default: - snprintf(str, PTL_NALFMT_SIZE-1, "(?%llx)", (long long)nid); + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", + nal, (long long)nid); + break; } return str; } @@ -976,8 +992,6 @@ spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; #if defined(__arch_um__) -extern int is_kernel_text_address(unsigned long addr); - char *portals_debug_dumpstack(void) { asm("int $3"); @@ -986,33 +1000,45 @@ char *portals_debug_dumpstack(void) #elif defined(__i386__) -extern int is_kernel_text_address(unsigned long addr); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) extern int lookup_symbol(unsigned long address, char *buf, int buflen); +const char *kallsyms_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, char *namebuf) +{ + int rc = lookup_symbol(addr, namebuf, 128); + if (rc == -ENOSYS) + return NULL; + return namebuf; +} +#endif char *portals_debug_dumpstack(void) { - unsigned long esp = current->thread.esp; + unsigned long esp = current->thread.esp, addr; unsigned long *stack = (unsigned long *)&esp; + char *buf = stack_backtrace, *pbuf = buf; int size; - unsigned long addr; - char *buf = stack_backtrace; - char *pbuf = buf; - static char buffer[512]; - int rc = 0; /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK<<1)){ + if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ buf[0] = '\0'; goto out; } size = sprintf(pbuf, " Call Trace: "); pbuf += size; - while (((long) stack & (THREAD_SIZE-1)) != 0) { + while (((long) stack & (THREAD_SIZE - 1)) != 0) { addr = *stack++; - if (is_kernel_text_address(addr)) { - rc = lookup_symbol(addr, buffer, 512); - if (rc == -ENOSYS) { + if (kernel_text_address(addr)) { + const char *sym_name; + char *modname, buffer[128]; + unsigned long junk, offset; + + sym_name = kallsyms_lookup(addr, &junk, &offset, + &modname, buffer); + if (sym_name == NULL) { if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) break; size = sprintf(pbuf, "[<%08lx>] ", addr); @@ -1022,7 +1048,7 @@ char *portals_debug_dumpstack(void) <= pbuf + strlen(buffer) + 28 + 1) break; size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack-1); + addr, buffer, stack - 1); } pbuf += size; } diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 9daa8e0..4e63c86 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -51,7 +51,13 @@ #define PORTAL_MINOR 240 -extern void (kping_client)(struct portal_ioctl_data *); +struct nal_cmd_handler { + nal_cmd_handler_fn *nch_handler; + void *nch_private; +}; + +static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; +static DECLARE_MUTEX(nal_cmd_sem); #ifdef PORTAL_DEBUG void kportal_assertion_failed(char *expr, char *file, const char *func, @@ -239,6 +245,62 @@ static inline void freedata(void *data, int len) PORTAL_FREE(data, len); } +int +libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) +{ + int rc = 0; + + CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); + + if (nal > 0 && nal <= NAL_MAX_NR) { + down(&nal_cmd_sem); + if (nal_cmd[nal].nch_handler != NULL) + rc = -EBUSY; + else { + nal_cmd[nal].nch_handler = handler; + nal_cmd[nal].nch_private = private; + } + up(&nal_cmd_sem); + } + return rc; +} +EXPORT_SYMBOL(libcfs_nal_cmd_register); + +void +libcfs_nal_cmd_unregister(int nal) +{ + CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); + + LASSERT(nal > 0 && nal <= NAL_MAX_NR); + LASSERT(nal_cmd[nal].nch_handler != NULL); + + down(&nal_cmd_sem); + nal_cmd[nal].nch_handler = NULL; + nal_cmd[nal].nch_private = NULL; + up(&nal_cmd_sem); +} +EXPORT_SYMBOL(libcfs_nal_cmd_unregister); + +int +libcfs_nal_cmd(struct portals_cfg *pcfg) +{ + __u32 nal = pcfg->pcfg_nal; + int rc = -EINVAL; + ENTRY; + + down(&nal_cmd_sem); + if (nal > 0 && nal <= NAL_MAX_NR && + nal_cmd[nal].nch_handler != NULL) { + CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, + pcfg->pcfg_command); + rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); + } + up(&nal_cmd_sem); + + RETURN(rc); +} +EXPORT_SYMBOL(libcfs_nal_cmd); + static DECLARE_RWSEM(ioctl_list_sem); static LIST_HEAD(ioctl_list); @@ -339,14 +401,22 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, err = lwt_control (data->ioc_flags, data->ioc_misc); break; - case IOC_PORTAL_LWT_SNAPSHOT: - err = lwt_snapshot (&data->ioc_nid, - &data->ioc_count, &data->ioc_misc, + case IOC_PORTAL_LWT_SNAPSHOT: { + cycles_t now; + int ncpu; + int total_size; + + err = lwt_snapshot (&now, &ncpu, &total_size, data->ioc_pbuf1, data->ioc_plen1); + data->ioc_nid = now; + data->ioc_count = ncpu; + data->ioc_misc = total_size; + if (err == 0 && copy_to_user((char *)arg, data, sizeof (*data))) err = -EFAULT; break; + } case IOC_PORTAL_LWT_LOOKUP_STRING: err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1, @@ -356,6 +426,33 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, err = -EFAULT; break; #endif + case IOC_PORTAL_NAL_CMD: { + struct portals_cfg pcfg; + + if (data->ioc_plen1 != sizeof(pcfg)) { + CERROR("Bad ioc_plen1 %d (wanted %d)\n", + data->ioc_plen1, sizeof(pcfg)); + err = -EINVAL; + break; + } + + if (copy_from_user(&pcfg, (void *)data->ioc_pbuf1, + sizeof(pcfg))) { + err = -EFAULT; + break; + } + + CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, + pcfg.pcfg_command); + err = libcfs_nal_cmd(&pcfg); + + if (err == 0 && + copy_to_user((char *)data->ioc_pbuf1, &pcfg, + sizeof (pcfg))) + err = -EFAULT; + break; + } + case IOC_PORTAL_MEMHOG: if (!capable (CAP_SYS_ADMIN)) err = -EPERM; diff --git a/lnet/libcfs/proc.c b/lnet/libcfs/proc.c index c1b2aec..c850f69 100644 --- a/lnet/libcfs/proc.c +++ b/lnet/libcfs/proc.c @@ -62,16 +62,18 @@ extern char debug_daemon_file_path[1024]; extern char portals_upcall[1024]; #define PSDEV_PORTALS (0x100) -#define PSDEV_DEBUG 1 /* control debugging */ -#define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */ -#define PSDEV_PRINTK 3 /* force all errors to console */ -#define PSDEV_CONSOLE 4 /* allow _any_ messages to console */ -#define PSDEV_DEBUG_PATH 5 /* crashdump log location */ -#define PSDEV_DEBUG_DUMP_PATH 6 /* crashdump tracelog location */ -#define PSDEV_PORTALS_UPCALL 7 /* User mode upcall script */ - -#define PORTALS_PRIMARY_CTLCNT 7 -static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = { +enum { + PSDEV_DEBUG = 1, /* control debugging */ + PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ + PSDEV_PRINTK, /* force all errors to console */ + PSDEV_CONSOLE, /* allow _any_ messages to console */ + PSDEV_DEBUG_PATH, /* crashdump log location */ + PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ + PSDEV_PORTALS_UPCALL, /* User mode upcall script */ + PSDEV_PORTALS_MEMUSED, /* bytes currently PORTAL_ALLOCated */ +}; + +static struct ctl_table portals_table[] = { {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL, &proc_dointvec}, {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug, @@ -88,6 +90,8 @@ static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = { {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, sizeof(portals_upcall), 0644, NULL, &proc_dostring, &sysctl_string}, + {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter, + sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff --git a/lnet/lnet/.cvsignore b/lnet/lnet/.cvsignore index 95973d6..5ed596b 100644 --- a/lnet/lnet/.cvsignore +++ b/lnet/lnet/.cvsignore @@ -1,4 +1,10 @@ .deps Makefile -Makefile.in -.*.o.cmd +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lnet/lnet/Makefile.in b/lnet/lnet/Makefile.in new file mode 100644 index 0000000..6ce334b --- /dev/null +++ b/lnet/lnet/Makefile.in @@ -0,0 +1,6 @@ +MODULES := portals +portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o +portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o +portals-objs += lib-move.o lib-ni.o lib-pid.o module.o + +@INCLUDE_RULES@ diff --git a/lnet/lnet/api-eq.c b/lnet/lnet/api-eq.c index 390156a..0306043 100644 --- a/lnet/lnet/api-eq.c +++ b/lnet/lnet/api-eq.c @@ -23,30 +23,9 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS #include -int ptl_eq_init(void) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - int ptl_get_event (ptl_eq_t *eq, ptl_event_t *ev) { int new_index = eq->sequence & (eq->size - 1); diff --git a/lnet/lnet/api-errno.c b/lnet/lnet/api-errno.c index 0e155da..1c01c88 100644 --- a/lnet/lnet/api-errno.c +++ b/lnet/lnet/api-errno.c @@ -36,8 +36,7 @@ const char *ptl_err_str[] = { "PTL_MD_NO_UPDATE", "PTL_FAIL", - "PTL_IOV_TOO_MANY", - "PTL_IOV_TOO_SMALL", + "PTL_IOV_INVALID", "PTL_EQ_IN_USE", diff --git a/lnet/lnet/api-init.c b/lnet/lnet/api-init.c index e41bad8..9a98714 100644 --- a/lnet/lnet/api-init.c +++ b/lnet/lnet/api-init.c @@ -23,43 +23,23 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS #include -int ptl_init; - -int __p30_initialized; -int __p30_myr_initialized; -int __p30_ip_initialized; -ptl_handle_ni_t __myr_ni_handle; -ptl_handle_ni_t __ip_ni_handle; - int PtlInit(int *max_interfaces) { if (max_interfaces != NULL) - *max_interfaces = NAL_ENUM_END_MARKER; - - if (ptl_init) - return PTL_OK; + *max_interfaces = NAL_MAX_NR; LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO")); - ptl_ni_init(); - ptl_me_init(); - ptl_eq_init(); - ptl_init = 1; - - return PTL_OK; + return ptl_ni_init(); } void PtlFini(void) { - - /* Reverse order of initialization */ - ptl_eq_fini(); - ptl_me_fini(); ptl_ni_fini(); - ptl_init = 0; } diff --git a/lnet/lnet/api-me.c b/lnet/lnet/api-me.c index e724e58..37f0150 100644 --- a/lnet/lnet/api-me.c +++ b/lnet/lnet/api-me.c @@ -23,20 +23,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS #include -int ptl_me_init(void) -{ - return PTL_OK; -} -void ptl_me_fini(void) -{ /* Nothing to do */ -} -int ptl_me_ni_init(nal_t * nal) -{ - return PTL_OK; -} - -void ptl_me_ni_fini(nal_t * nal) -{ /* Nothing to do... */ -} diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 02082c6..4f37d13 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -23,15 +23,39 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS #include +int ptl_init; + /* Put some magic in the NI handle so uninitialised/zeroed handles are easy * to spot */ #define NI_HANDLE_MAGIC 0xebc0de00 #define NI_HANDLE_MASK 0x000000ff -#define MAX_NIS 8 -static nal_t *ptl_interfaces[MAX_NIS]; -int ptl_num_interfaces = 0; + +static struct nal_t *ptl_nal_table[NAL_MAX_NR]; + +#ifdef __KERNEL__ +DECLARE_MUTEX(ptl_mutex); + +static void ptl_mutex_enter (void) +{ + down (&ptl_mutex); +} + +static void ptl_mutex_exit (void) +{ + up (&ptl_mutex); +} +#else +static void ptl_mutex_enter (void) +{ +} + +static void ptl_mutex_exit (void) +{ +} +#endif nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) { @@ -46,147 +70,188 @@ nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) return NULL; idx &= NI_HANDLE_MASK; - if (idx < MAX_NIS) - return ptl_interfaces[idx]; + + if (idx >= NAL_MAX_NR || + ptl_nal_table[idx] == NULL || + ptl_nal_table[idx]->nal_refct == 0) + return NULL; - return NULL; + return ptl_nal_table[idx]; } -int ptl_ni_init(void) +int ptl_register_nal (ptl_interface_t interface, nal_t *nal) { - int i; - - LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1)); + int rc; - for (i = 0; i < MAX_NIS; i++) - ptl_interfaces[i] = NULL; + ptl_mutex_enter(); + + if (interface < 0 || interface >= NAL_MAX_NR) + rc = PTL_IFACE_INVALID; + else if (ptl_nal_table[interface] != NULL) + rc = PTL_IFACE_DUP; + else { + rc = PTL_OK; + ptl_nal_table[interface] = nal; + LASSERT(nal->nal_refct == 0); + } - return PTL_OK; + ptl_mutex_exit(); + return (rc); } -void ptl_ni_fini(void) +void ptl_unregister_nal (ptl_interface_t interface) { - int i; - - for (i = 0; i < MAX_NIS; i++) { - nal_t *nal = ptl_interfaces[i]; - if (!nal) - continue; + LASSERT(interface >= 0 && interface < NAL_MAX_NR); + LASSERT(ptl_nal_table[interface] != NULL); + LASSERT(ptl_nal_table[interface]->nal_refct == 0); + + ptl_mutex_enter(); + + ptl_nal_table[interface] = NULL; - if (nal->shutdown) - nal->shutdown(nal, i); - } + ptl_mutex_exit(); } -#ifdef __KERNEL__ -DECLARE_MUTEX(ptl_ni_init_mutex); - -static void ptl_ni_init_mutex_enter (void) +int ptl_ni_init(void) { - down (&ptl_ni_init_mutex); -} + /* If this assertion fails, we need more bits in NI_HANDLE_MASK and + * to shift NI_HANDLE_MAGIC left appropriately */ + LASSERT (NAL_MAX_NR <= (NI_HANDLE_MASK + 1)); + + ptl_mutex_enter(); + + if (!ptl_init) { + /* NULL pointers, clear flags */ + memset(ptl_nal_table, 0, sizeof(ptl_nal_table)); +#ifndef __KERNEL__ + /* Kernel NALs register themselves when their module loads, + * and unregister themselves when their module is unloaded. + * Userspace NALs, are plugged in explicitly here... */ + { + extern nal_t procapi_nal; + + /* XXX pretend it's socknal to keep liblustre happy... */ + ptl_nal_table[SOCKNAL] = &procapi_nal; + LASSERT (procapi_nal.nal_refct == 0); + } +#endif + ptl_init = 1; + } -static void ptl_ni_init_mutex_exit (void) -{ - up (&ptl_ni_init_mutex); + ptl_mutex_exit(); + + return PTL_OK; } -#else -static void ptl_ni_init_mutex_enter (void) +void ptl_ni_fini(void) { -} + nal_t *nal; + int i; + + ptl_mutex_enter(); + + if (ptl_init) { + for (i = 0; i < NAL_MAX_NR; i++) { + + nal = ptl_nal_table[i]; + if (nal == NULL) + continue; + + if (nal->nal_refct != 0) { + CWARN("NAL %d has outstanding refcount %d\n", + i, nal->nal_refct); + nal->shutdown(nal); + } + + ptl_nal_table[i] = NULL; + } -static void ptl_ni_init_mutex_exit (void) -{ + ptl_init = 0; + } + + ptl_mutex_exit(); } -#endif - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, ptl_pid_t requested_pid, - ptl_handle_ni_t * handle) +int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid, + ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits, + ptl_handle_ni_t *handle) { nal_t *nal; - int i; + int i; + int rc; if (!ptl_init) return PTL_NO_INIT; - ptl_ni_init_mutex_enter (); + ptl_mutex_enter (); - nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid); - - if (!nal) { - ptl_ni_init_mutex_exit (); - return PTL_NAL_FAILED; + if (interface == PTL_IFACE_DEFAULT) { + for (i = 0; i < NAL_MAX_NR; i++) + if (ptl_nal_table[i] != NULL) { + interface = i; + break; + } + /* NB if no interfaces are registered, 'interface' will + * fail the valid test below */ } - - for (i = 0; i < ptl_num_interfaces; i++) { - if (ptl_interfaces[i] == nal) { - nal->refct++; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i; - CDEBUG(D_OTHER, "Returning existing NAL (%d)\n", i); - ptl_ni_init_mutex_exit (); - return PTL_OK; - } + + if (interface < 0 || + interface >= NAL_MAX_NR || + ptl_nal_table[interface] == NULL) { + GOTO(out, rc = PTL_IFACE_INVALID); } - nal->refct = 1; - if (ptl_num_interfaces >= MAX_NIS) { - if (nal->shutdown) - nal->shutdown (nal, ptl_num_interfaces); - ptl_ni_init_mutex_exit (); - return PTL_NO_SPACE; - } + nal = ptl_nal_table[interface]; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces; - ptl_interfaces[ptl_num_interfaces++] = nal; + CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct); + rc = nal->startup(nal, requested_pid, desired_limits, actual_limits); - ptl_eq_ni_init(nal); - ptl_me_ni_init(nal); + if (rc != PTL_OK) { + CERROR("Error %d starting up NAL %d, refs %d\n", rc, + interface, nal->nal_refct); + GOTO(out, rc); + } + + if (nal->nal_refct != 0) { + /* Caller gets to know if this was the first ref or not */ + rc = PTL_IFACE_DUP; + } + + nal->nal_refct++; + handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface; - ptl_ni_init_mutex_exit (); - return PTL_OK; + out: + ptl_mutex_exit (); + return rc; } - int PtlNIFini(ptl_handle_ni_t ni) { nal_t *nal; - int idx; - int rc; + int idx; if (!ptl_init) return PTL_NO_INIT; - ptl_ni_init_mutex_enter (); + ptl_mutex_enter (); nal = ptl_hndl2nal (&ni); if (nal == NULL) { - ptl_ni_init_mutex_exit (); + ptl_mutex_exit (); return PTL_HANDLE_INVALID; } idx = ni.nal_idx & NI_HANDLE_MASK; - nal->refct--; - if (nal->refct > 0) { - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - ptl_me_ni_fini(nal); - ptl_eq_ni_fini(nal); + LASSERT(nal->nal_refct > 0); - rc = PTL_OK; - if (nal->shutdown) - rc = nal->shutdown(nal, idx); + nal->nal_refct--; - ptl_interfaces[idx] = NULL; - ptl_num_interfaces--; + /* nal_refct == 0 tells nal->shutdown to really shut down */ + nal->shutdown(nal); - ptl_ni_init_mutex_exit (); - return rc; + ptl_mutex_exit (); + return PTL_OK; } int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out) diff --git a/lnet/lnet/api-wrap.c b/lnet/lnet/api-wrap.c index 9c82c30..3e6f9ce 100644 --- a/lnet/lnet/api-wrap.c +++ b/lnet/lnet/api-wrap.c @@ -124,25 +124,6 @@ int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, return ret.rc; } - - -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in) -{ - PtlNIDebug_in args; - PtlNIDebug_out ret; - int rc; - - args.mask_in = mask_in; - - rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, @@ -255,45 +236,6 @@ int PtlMEDump(ptl_handle_me_t current_in) return ret.rc; } -static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in) -{ - nal_t *nal; - int rc; - int i; - - if (!ptl_init) { - CERROR("PtlMDAttach/Bind/Update: Not initialized\n"); - return PTL_NO_INIT; - } - - nal = ptl_hndl2nal(¤t_in); - if (!nal) - return PTL_HANDLE_INVALID; - - if (nal->validate != NULL) /* nal->validate not a NOOP */ - { - if ((md_in.options & PTL_MD_IOVEC) == 0) /* contiguous */ - { - rc = nal->validate (nal, md_in.start, md_in.length); - if (rc) - return (PTL_SEGV); - } - else - { - struct iovec *iov = (struct iovec *)md_in.start; - - for (i = 0; i < md_in.niov; i++, iov++) - { - rc = nal->validate (nal, iov->iov_base, iov->iov_len); - if (rc) - return (PTL_SEGV); - } - } - } - - return 0; -} - static ptl_handle_eq_t md2eq (ptl_md_t *md) { if (PtlHandleIsEqual (md->eventq, PTL_EQ_NONE)) @@ -310,16 +252,13 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, PtlMDAttach_out ret; int rc; - rc = validate_md(me_in, md_in); - if (rc == PTL_OK) { - args.eq_in = md2eq(&md_in); - args.me_in = me_in; - args.md_in = md_in; - args.unlink_in = unlink_in; + args.eq_in = md2eq(&md_in); + args.me_in = me_in; + args.md_in = md_in; + args.unlink_in = unlink_in; - rc = do_forward(me_in, PTL_MDATTACH, - &args, sizeof(args), &ret, sizeof(ret)); - } + rc = do_forward(me_in, PTL_MDATTACH, + &args, sizeof(args), &ret, sizeof(ret)); if (rc != PTL_OK) return (rc == PTL_HANDLE_INVALID) ? PTL_ME_INVALID : rc; @@ -340,10 +279,6 @@ int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, PtlMDBind_out ret; int rc; - rc = validate_md(ni_in, md_in); - if (rc != PTL_OK) - return rc; - args.eq_in = md2eq(&md_in); args.ni_in = ni_in; args.md_in = md_in; @@ -378,9 +313,6 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, args.old_inout_valid = 0; if (new_inout) { - rc = validate_md (md_in, *new_inout); - if (rc != PTL_OK) - return (rc == PTL_HANDLE_INVALID) ? PTL_MD_INVALID : rc; args.new_inout = *new_inout; args.new_inout_valid = 1; } else @@ -423,7 +355,7 @@ int PtlMDUnlink(ptl_handle_md_t md_in) } int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - int (*callback) (ptl_event_t * event), + ptl_eq_handler_t callback, ptl_handle_eq_t * handle_out) { ptl_eq_t *eq = NULL; @@ -458,12 +390,6 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, for (i = 0; i < count; i++) ev[i].sequence = 0; - if (nal->validate != NULL) { - rc = nal->validate(nal, ev, count * sizeof(ptl_event_t)); - if (rc != PTL_OK) - goto fail; - } - args.ni_in = interface; args.count_in = count; args.base_in = ev; diff --git a/lnet/lnet/Makefile.am b/lnet/lnet/autoMakefile.am similarity index 57% rename from lnet/lnet/Makefile.am rename to lnet/lnet/autoMakefile.am index 59f9dd9..bf7a107 100644 --- a/lnet/lnet/Makefile.am +++ b/lnet/lnet/autoMakefile.am @@ -3,30 +3,24 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -DEFS = - my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \ lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \ lib-md.c lib-move.c lib-ni.c lib-pid.c -noinst_LIBRARIES= libportals.a -libportals_a_SOURCES= $(my_sources) +if !CRAY_PORTALS if LIBLUSTRE -libportals_a_CFLAGS= -fPIC -else - -MODULE = portals -modulenet_DATA = portals.o -EXTRA_PROGRAMS = portals - -DISTCLEANFILES = *.orig *.rej +noinst_LIBRARIES= libportals.a +libportals_a_SOURCES= $(my_sources) +libportals_a_CPPFLAGS = $(LLCPPFLAGS) +libportals_a_CFLAGS = $(LLCFLAGS) +endif -portals_SOURCES= module.c $(my_sources) +if MODULES +modulenet_DATA = portals$(KMODEXT) +endif # MODULES -# Don't distribute any patched files. -dist-hook: - list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done +endif # CRAY_PORTALS -include ../Rules.linux -endif +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(portals-objs:%.o=%.c) diff --git a/lnet/lnet/lib-dispatch.c b/lnet/lnet/lib-dispatch.c index 13036c7..798e117 100644 --- a/lnet/lnet/lib-dispatch.c +++ b/lnet/lnet/lib-dispatch.c @@ -35,7 +35,6 @@ static dispatch_table_t dispatch_table[] = { [PTL_GETID] {do_PtlGetId, "PtlGetId"}, [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"}, [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"}, - [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"}, [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"}, [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"}, [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"}, diff --git a/lnet/lnet/lib-init.c b/lnet/lnet/lib-init.c index 61ef465..c62dbc2 100644 --- a/lnet/lnet/lib-init.c +++ b/lnet/lnet/lib-init.c @@ -41,8 +41,15 @@ #ifndef PTL_USE_LIB_FREELIST int -kportal_descriptor_setup (nal_cb_t *nal) +kportal_descriptor_setup (nal_cb_t *nal, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { + /* Ignore requested limits! */ + actual_limits->max_mes = INT_MAX; + actual_limits->max_mds = INT_MAX; + actual_limits->max_eqs = INT_MAX; + return PTL_OK; } @@ -100,7 +107,9 @@ lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl) } int -kportal_descriptor_setup (nal_cb_t *nal) +kportal_descriptor_setup (nal_cb_t *nal, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { /* NB on failure caller must still call kportal_descriptor_cleanup */ /* ****** */ @@ -111,6 +120,13 @@ kportal_descriptor_setup (nal_cb_t *nal) memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds)); memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs)); + /* Ignore requested limits! */ + actual_limits->max_mes = MAX_MES; + actual_limits->max_mds = MAX_MDS; + actual_limits->max_eqs = MAX_EQS; + /* Hahahah what a load of bollocks. There's nowhere to + * specify the max # messages in-flight */ + rc = lib_freelist_init (nal, &nal->ni.ni_free_mes, MAX_MES, sizeof (lib_me_t)); if (rc != PTL_OK) @@ -248,21 +264,18 @@ lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh) } int -lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size) +lib_init(nal_cb_t *nal, ptl_process_id_t process_id, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { int rc = PTL_OK; lib_ni_t *ni = &nal->ni; + int ptl_size; int i; ENTRY; /* NB serialised in PtlNIInit() */ - if (ni->refcnt != 0) { /* already initialised */ - ni->refcnt++; - goto out; - } - lib_assert_wire_constants (); /* @@ -271,7 +284,8 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, */ memset(&ni->counters, 0, sizeof(lib_counters_t)); - rc = kportal_descriptor_setup (nal); + rc = kportal_descriptor_setup (nal, requested_limits, + &ni->actual_limits); if (rc != PTL_OK) goto out; @@ -287,12 +301,15 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, if (rc != PTL_OK) goto out; - ni->nid = nid; - ni->pid = pid; + ni->nid = process_id.nid; + ni->pid = process_id.pid; - ni->num_nodes = gsize; - ni->tbl.size = ptl_size; + if (requested_limits != NULL) + ptl_size = requested_limits->max_pt_index + 1; + else + ptl_size = 64; + ni->tbl.size = ptl_size; ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size); if (ni->tbl.tbl == NULL) { rc = PTL_NO_SPACE; @@ -302,9 +319,20 @@ lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, for (i = 0; i < ptl_size; i++) INIT_LIST_HEAD(&(ni->tbl.tbl[i])); - ni->debug = PTL_DEBUG_NONE; - ni->up = 1; - ni->refcnt++; + /* max_{mes,mds,eqs} set in kportal_descriptor_setup */ + + /* We don't have an access control table! */ + ni->actual_limits.max_ac_index = -1; + + ni->actual_limits.max_pt_index = ptl_size - 1; + ni->actual_limits.max_md_iovecs = PTL_MD_MAX_IOV; + ni->actual_limits.max_me_list = INT_MAX; + + /* We don't support PtlGetPut! */ + ni->actual_limits.max_getput_md = 0; + + if (actual_limits != NULL) + *actual_limits = ni->actual_limits; out: if (rc != PTL_OK) { @@ -321,12 +349,7 @@ lib_fini(nal_cb_t * nal) lib_ni_t *ni = &nal->ni; int idx; - ni->refcnt--; - - if (ni->refcnt != 0) - goto out; - - /* NB no stat_lock() since this is the last reference. The NAL + /* NB no state_lock() since this is the last reference. The NAL * should have shut down already, so it should be safe to unlink * and free all descriptors, even those that appear committed to a * network op (eg MD with non-zero pending count) @@ -370,11 +393,9 @@ lib_fini(nal_cb_t * nal) } nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size); - ni->up = 0; lib_cleanup_handle_hash (nal); kportal_descriptor_cleanup (nal); - out: return (PTL_OK); } diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index 9a391cd..64a55b9 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -86,6 +86,7 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, lib_eq_t *eq = NULL; int rc; int i; + int niov; /* NB we are passed an allocated, but uninitialised/active md. * if we return success, caller may lib_md_unlink() it. @@ -101,7 +102,7 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, /* Must check this _before_ allocation. Also, note that non-iov * MDs must set md_niov to 0. */ LASSERT((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0 || - md->niov <= PTL_MD_MAX_IOV); + md->length <= PTL_MD_MAX_IOV); /* This implementation doesn't know how to create START events or * disable END events. Best to LASSERT our caller is compliant so @@ -116,7 +117,6 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, new->me = NULL; new->start = md->start; - new->length = md->length; new->offset = 0; new->max_size = md->max_size; new->options = md->options; @@ -132,13 +132,13 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) + niov * sizeof (new->md_iov.iov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the base address on trust */ if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ return PTL_VAL_FAILED; @@ -146,11 +146,10 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.iov[i].iov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - + new->length = total_length; + if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -166,13 +165,13 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, nal->cb_recv_pages == NULL) return PTL_MD_INVALID; - new->md_niov = md->niov; + new->md_niov = niov = md->length; if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) + niov * sizeof (new->md_iov.kiov[0]))) return PTL_SEGV; - for (i = 0; i < new->md_niov; i++) { + for (i = 0; i < niov; i++) { /* We take the page pointer on trust */ if (new->md_iov.kiov[i].kiov_offset + new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) @@ -181,23 +180,23 @@ static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, total_length += new->md_iov.kiov[i].kiov_len; } - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; + new->length = total_length; if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, + rc = nal->cb_map_pages (nal, niov, new->md_iov.kiov, &new->md_addrkey); if (rc != PTL_OK) return (rc); } #endif } else { /* contiguous */ - new->md_niov = 1; + new->length = md->length; + new->md_niov = niov = 1; new->md_iov.iov[0].iov_base = md->start; new->md_iov.iov[0].iov_len = md->length; if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, + rc = nal->cb_map (nal, niov, new->md_iov.iov, &new->md_addrkey); if (rc != PTL_OK) return (rc); @@ -223,13 +222,13 @@ void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) * and that's all. */ new->start = md->start; - new->length = md->length; + new->length = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? + md->length : md->md_niov; new->threshold = md->threshold; new->max_size = md->max_size; new->options = md->options; new->user_ptr = md->user_ptr; ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; } int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) @@ -251,8 +250,8 @@ int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) unsigned long flags; if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) @@ -303,8 +302,8 @@ int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) unsigned long flags; if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 && - args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */ - return (ret->rc = PTL_IOV_TOO_MANY); + args->md_in.length > PTL_MD_MAX_IOV) /* too many fragments */ + return (ret->rc = PTL_IOV_INVALID); md = lib_md_alloc(nal, &args->md_in); if (md == NULL) @@ -407,23 +406,16 @@ int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, goto out; } - /* XXX fttb, the new MD must be the same type wrt fragmentation */ - if (((new->options ^ md->options) & - (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) { - ret->rc = PTL_MD_INVALID; - goto out; - } - - if (new->niov > md->md_niov) { - ret->rc = PTL_IOV_TOO_MANY; + /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, + * since we simply overwrite the old lib-md */ + if ((((new->options ^ md->options) & + (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || + ((new->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && + new->length != md->md_niov)) { + ret->rc = PTL_IOV_INVALID; goto out; } - if (new->niov < md->md_niov) { - ret->rc = PTL_IOV_TOO_SMALL; - goto out; - } - if (!PtlHandleIsEqual (args->testq_in, PTL_EQ_NONE)) { test_eq = ptl_handle2eq(&args->testq_in, nal); if (test_eq == NULL) { diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index e3c46ea..271fc82 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -146,13 +146,6 @@ int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) /* call with state_lock please */ void lib_me_unlink(nal_cb_t *nal, lib_me_t *me) { - lib_ni_t *ni = &nal->ni; - - if (ni->debug & PTL_DEBUG_UNLINK) { - ptl_handle_any_t handle; - ptl_me2handle(&handle, me); - } - list_del (&me->me_list); if (me->md) { diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 869c9d6..1b69533 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -89,10 +89,6 @@ lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) int rc; ptl_hdr_t ack; - /* ni went down while processing this message */ - if (nal->ni.up == 0) - return; - if (msg == NULL) return; diff --git a/lnet/lnet/lib-ni.c b/lnet/lnet/lib-ni.c index 296bc4a..aa959fc 100644 --- a/lnet/lnet/lib-ni.c +++ b/lnet/lnet/lib-ni.c @@ -29,18 +29,6 @@ #define MAX_DIST 18446744073709551615ULL -int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlNIDebug_in *args = v_args; - PtlNIDebug_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->rc = ni->debug; - ni->debug = args->mask_in; - - return 0; -} - int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret) { /* diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 012d3d9..40e9da4 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -46,273 +46,17 @@ #include #include +#include #include #include #include extern void (kping_client)(struct portal_ioctl_data *); -struct nal_cmd_handler { - nal_cmd_handler_t nch_handler; - void * nch_private; -}; - -static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; -static DECLARE_MUTEX(nal_cmd_sem); - - -static int -kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when) -{ - int rc; - kpr_control_interface_t *ci; - - /* No error if router not preset. Sysadmin is allowed to notify - * _everywhere_ when a NID boots or crashes, even if they know - * nothing of the peer. */ - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (0); - - rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp, - ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep) -{ - int gateway_nalid; - ptl_nid_t gateway_nid; - ptl_nid_t lo_nid; - ptl_nid_t hi_nid; - int alive; - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, - &lo_nid, &hi_nid, &alive); - - if (rc == 0) { - CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n", - index, gateway_nalid, gateway_nid, lo_nid, hi_nid, - alive ? "up" : "down"); - - *gateway_nalidp = (__u32)gateway_nalid; - *gateway_nidp = gateway_nid; - *lo_nidp = lo_nid; - *hi_nidp = hi_nid; - *alivep = alive; - } - - PORTAL_SYMBOL_PUT (kpr_control_interface); - return (rc); -} - -static int -kportal_router_cmd(struct portals_cfg *pcfg, void * private) -{ - int err = -EINVAL; - ENTRY; - - switch(pcfg->pcfg_command) { - default: - CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); - break; - - case NAL_CMD_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_nid2, pcfg->pcfg_nid3); - break; - - case NAL_CMD_NOTIFY_ROUTER: { - CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", - pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags ? "Enabling" : "Disabling", - (time_t)pcfg->pcfg_nid3); - - err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, - pcfg->pcfg_flags, - (time_t)pcfg->pcfg_nid3); - break; - } - - case NAL_CMD_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); - err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, - &pcfg->pcfg_nid, - &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, - &pcfg->pcfg_flags); - break; - } - RETURN(err); -} - -int -kportal_nal_cmd(struct portals_cfg *pcfg) -{ - __u32 nal = pcfg->pcfg_nal; - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, - pcfg->pcfg_command); - rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -} - -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - return (PORTAL_SYMBOL_GET(kqswnal_ni)); - case SOCKNAL: - return (PORTAL_SYMBOL_GET(ksocknal_ni)); - case GMNAL: - return (PORTAL_SYMBOL_GET(kgmnal_ni)); - case IBNAL: - return (PORTAL_SYMBOL_GET(kibnal_ni)); - case TCPNAL: - /* userspace NAL */ - return (NULL); - case SCIMACNAL: - return (PORTAL_SYMBOL_GET(kscimacnal_ni)); - default: - /* A warning to a naive caller */ - CERROR ("unknown nal: %d\n", nal); - return (NULL); - } -} - -void -kportal_put_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - PORTAL_SYMBOL_PUT(kqswnal_ni); - break; - case SOCKNAL: - PORTAL_SYMBOL_PUT(ksocknal_ni); - break; - case GMNAL: - PORTAL_SYMBOL_PUT(kgmnal_ni); - break; - case IBNAL: - PORTAL_SYMBOL_PUT(kibnal_ni); - break; - case TCPNAL: - /* A lesson to a malicious caller */ - LBUG (); - case SCIMACNAL: - PORTAL_SYMBOL_PUT(kscimacnal_ni); - break; - default: - CERROR ("unknown nal: %d\n", nal); - } -} - -int -kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - if (nal_cmd[nal].nch_handler != NULL) - rc = -EBUSY; - else { - nal_cmd[nal].nch_handler = handler; - nal_cmd[nal].nch_private = private; - } - up(&nal_cmd_sem); - } - return rc; -} - -int -kportal_nal_unregister(int nal) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - nal_cmd[nal].nch_handler = NULL; - nal_cmd[nal].nch_private = NULL; - up(&nal_cmd_sem); - } - return rc; -} - static int kportal_ioctl(struct portal_ioctl_data *data, unsigned int cmd, unsigned long arg) { - int err = 0; + int err; char str[PTL_NALFMT_SIZE]; ENTRY; @@ -334,68 +78,53 @@ static int kportal_ioctl(struct portal_ioctl_data *data, } case IOC_PORTAL_GET_NID: { - const ptl_handle_ni_t *nip; - ptl_process_id_t pid; + ptl_handle_ni_t nih; + ptl_process_id_t pid; CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal); - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) + err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + if (!(err == PTL_OK || err == PTL_IFACE_DUP)) RETURN (-EINVAL); - err = PtlGetId (*nip, &pid); + err = PtlGetId (nih, &pid); LASSERT (err == PTL_OK); - kportal_put_ni (data->ioc_nal); + + PtlNIFini(nih); data->ioc_nid = pid.nid; if (copy_to_user ((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; + RETURN (-EFAULT); + RETURN(0); } - case IOC_PORTAL_NAL_CMD: { - struct portals_cfg pcfg; - - LASSERT (data->ioc_plen1 == sizeof(pcfg)); - err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, - sizeof(pcfg)); - if ( err ) { - EXIT; - return err; - } - - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal, - pcfg.pcfg_command); - err = kportal_nal_cmd(&pcfg); - if (err == 0) { - if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, - sizeof (pcfg))) - err = -EFAULT; - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - } - break; - } case IOC_PORTAL_FAIL_NID: { - const ptl_handle_ni_t *nip; + ptl_handle_ni_t nih; CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", data->ioc_nal, data->ioc_nid, data->ioc_count); - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) + err = PtlNIInit(data->ioc_nal, 0, NULL, NULL, &nih); + if (!(err == PTL_OK || err == PTL_IFACE_DUP)) return (-EINVAL); - err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count); - kportal_put_ni (data->ioc_nal); - break; + if (err == PTL_OK) { + /* There's no point in failing an interface that + * came into existance just for this */ + err = -EINVAL; + } else { + err = PtlFailNid (nih, data->ioc_nid, data->ioc_count); + if (err != PTL_OK) + err = -EINVAL; + } + + PtlNIFini(nih); + RETURN (err); } default: - err = -EINVAL; - break; + RETURN(-EINVAL); } - - RETURN(err); + /* Not Reached */ } DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl); @@ -411,30 +140,24 @@ static int init_kportals_module(void) RETURN(rc); } - rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL); - if (rc) { - PtlFini(); - CERROR("kportal_nal_registre: ROUTER error %d\n", rc); - } - - if (rc == 0) - libcfs_register_ioctl(&kportal_ioctl_handler); + rc = libcfs_register_ioctl(&kportal_ioctl_handler); + LASSERT (rc == 0); RETURN(rc); } static void exit_kportals_module(void) { - libcfs_deregister_ioctl(&kportal_ioctl_handler); - kportal_nal_unregister(ROUTER); + int rc; + + rc = libcfs_deregister_ioctl(&kportal_ioctl_handler); + LASSERT (rc == 0); + PtlFini(); } -EXPORT_SYMBOL(kportal_nal_register); -EXPORT_SYMBOL(kportal_nal_unregister); -EXPORT_SYMBOL(kportal_get_ni); -EXPORT_SYMBOL(kportal_put_ni); -EXPORT_SYMBOL(kportal_nal_cmd); +EXPORT_SYMBOL(ptl_register_nal); +EXPORT_SYMBOL(ptl_unregister_nal); EXPORT_SYMBOL(ptl_err_str); EXPORT_SYMBOL(lib_dispatch); @@ -446,7 +169,6 @@ EXPORT_SYMBOL(PtlMDAttach); EXPORT_SYMBOL(PtlMDUnlink); EXPORT_SYMBOL(PtlNIInit); EXPORT_SYMBOL(PtlNIFini); -EXPORT_SYMBOL(PtlNIDebug); EXPORT_SYMBOL(PtlInit); EXPORT_SYMBOL(PtlFini); EXPORT_SYMBOL(PtlSnprintHandle); diff --git a/lnet/router/.cvsignore b/lnet/router/.cvsignore index 95973d6..5ed596b 100644 --- a/lnet/router/.cvsignore +++ b/lnet/router/.cvsignore @@ -1,4 +1,10 @@ .deps Makefile -Makefile.in -.*.o.cmd +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lnet/router/Makefile.am b/lnet/router/Makefile.am deleted file mode 100644 index 1c8087b..0000000 --- a/lnet/router/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Rules.linux - -MODULE = kptlrouter -modulenet_DATA = kptlrouter.o -EXTRA_PROGRAMS = kptlrouter - - -#CFLAGS:= @KCFLAGS@ -#CPPFLAGS:=@KCPPFLAGS@ -DEFS = -kptlrouter_SOURCES = router.c proc.c router.h diff --git a/lnet/router/Makefile.in b/lnet/router/Makefile.in new file mode 100644 index 0000000..3bb6cf7 --- /dev/null +++ b/lnet/router/Makefile.in @@ -0,0 +1,4 @@ +MODULES := kptlrouter +kptlrouter-objs := router.o proc.o + +@INCLUDE_RULES@ diff --git a/lnet/router/autoMakefile.am b/lnet/router/autoMakefile.am new file mode 100644 index 0000000..fa11e8c --- /dev/null +++ b/lnet/router/autoMakefile.am @@ -0,0 +1,13 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +if !CRAY_PORTALS +modulenet_DATA = kptlrouter$(KMODEXT) +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(kptlrouter-objs:%.o=%.c) router.h diff --git a/lnet/router/router.c b/lnet/router/router.c index d0dbf0a..9fb6afe 100644 --- a/lnet/router/router.c +++ b/lnet/router/router.c @@ -48,13 +48,6 @@ kpr_router_interface_t kpr_router_interface = { kprri_deregister: kpr_deregister_nal, }; -kpr_control_interface_t kpr_control_interface = { - kprci_add_route: kpr_add_route, - kprci_del_route: kpr_del_route, - kprci_get_route: kpr_get_route, - kprci_notify: kpr_sys_notify, -}; - int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { @@ -289,18 +282,9 @@ kpr_shutdown_nal (void *arg) LASSERT (!ne->kpne_shutdown); LASSERT (!in_interrupt()); - write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */ + write_lock_irqsave (&kpr_rwlock, flags); ne->kpne_shutdown = 1; - write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */ - - while (atomic_read (&ne->kpne_refcount) != 0) - { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", - ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); - - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } + write_unlock_irqrestore (&kpr_rwlock, flags); } void @@ -312,15 +296,22 @@ kpr_deregister_nal (void *arg) CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */ - LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */ LASSERT (!in_interrupt()); write_lock_irqsave (&kpr_rwlock, flags); - list_del (&ne->kpne_list); - write_unlock_irqrestore (&kpr_rwlock, flags); + /* Wait until all outstanding messages/notifications have completed */ + while (atomic_read (&ne->kpne_refcount) != 0) + { + CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", + ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); + + set_current_state (TASK_UNINTERRUPTIBLE); + schedule_timeout (HZ); + } + PORTAL_FREE (ne, sizeof (*ne)); PORTAL_MODULE_UNUSE; } @@ -377,12 +368,15 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid, ne->kpne_interface.kprni_nalid); - - if (ne->kpne_shutdown) /* caller is shutting down */ - return (-ENOENT); + LASSERT (!in_interrupt()); read_lock (&kpr_rwlock); + if (ne->kpne_shutdown) { /* caller is shutting down */ + read_unlock (&kpr_rwlock); + return (-ENOENT); + } + /* Search routes for one that has a gateway to target_nid on the callers network */ list_for_each (e, &kpr_routes) { @@ -452,25 +446,26 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) struct list_head *e; kpr_route_entry_t *re; kpr_nal_entry_t *tmp_ne; + int rc; CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd, target_nid, src_ne->kpne_interface.kprni_nalid); LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov)); - - atomic_inc (&kpr_queue_depth); - atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */ + LASSERT (!in_interrupt()); + + read_lock (&kpr_rwlock); kpr_fwd_packets++; /* (loose) stats accounting */ kpr_fwd_bytes += nob + sizeof(ptl_hdr_t); - if (src_ne->kpne_shutdown) /* caller is shutting down */ + if (src_ne->kpne_shutdown) { /* caller is shutting down */ + rc = -ESHUTDOWN; goto out; + } fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */ - read_lock (&kpr_rwlock); - /* Search routes for one that has a gateway to target_nid NOT on the caller's network */ list_for_each (e, &kpr_routes) { @@ -507,7 +502,9 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) kpr_update_weight (ge, nob); fwd->kprfd_gateway_nid = ge->kpge_nid; - atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */ + atomic_inc (&src_ne->kpne_refcount); /* source and dest nals are */ + atomic_inc (&dst_ne->kpne_refcount); /* busy until fwd completes */ + atomic_inc (&kpr_queue_depth); read_unlock (&kpr_rwlock); @@ -520,18 +517,16 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) return; } - read_unlock (&kpr_rwlock); + rc = -EHOSTUNREACH; out: kpr_fwd_errors++; - CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid); + CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d: %d\n", + fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc); - /* Can't find anywhere to forward to */ - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH); + (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc); - atomic_dec (&kpr_queue_depth); - atomic_dec (&src_ne->kpne_refcount); + read_unlock (&kpr_rwlock); } void @@ -635,7 +630,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, int kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid, - int alive, time_t when) + int alive, time_t when) { return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when)); } @@ -694,11 +689,12 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, } int -kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive) +kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid, + ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive) { struct list_head *e; + LASSERT (!in_interrupt()); read_lock(&kpr_rwlock); for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { @@ -722,11 +718,67 @@ kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, return (-ENOENT); } +static int +kpr_nal_cmd(struct portals_cfg *pcfg, void * private) +{ + int err = -EINVAL; + ENTRY; + + switch(pcfg->pcfg_command) { + default: + CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command); + break; + + case NAL_CMD_ADD_ROUTE: + CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", + pcfg->pcfg_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + break; + + case NAL_CMD_DEL_ROUTE: + CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n", + pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_nid2, pcfg->pcfg_nid3); + break; + + case NAL_CMD_NOTIFY_ROUTER: { + CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n", + pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_flags ? "Enabling" : "Disabling", + (time_t)pcfg->pcfg_nid3); + + err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid, + pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3); + break; + } + + case NAL_CMD_GET_ROUTE: + CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count); + err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal, + &pcfg->pcfg_nid, + &pcfg->pcfg_nid2, &pcfg->pcfg_nid3, + &pcfg->pcfg_flags); + break; + } + RETURN(err); +} + + static void /*__exit*/ kpr_finalise (void) { LASSERT (list_empty (&kpr_nals)); + libcfs_nal_cmd_unregister(ROUTER); + + PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); + + kpr_proc_fini(); + while (!list_empty (&kpr_routes)) { kpr_route_entry_t *re = list_entry(kpr_routes.next, kpr_route_entry_t, @@ -736,11 +788,6 @@ kpr_finalise (void) PORTAL_FREE(re, sizeof (*re)); } - kpr_proc_fini(); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - PORTAL_SYMBOL_UNREGISTER(kpr_control_interface); - CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n", atomic_read(&portal_kmemory)); } @@ -748,13 +795,20 @@ kpr_finalise (void) static int __init kpr_initialise (void) { + int rc; + CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); kpr_proc_init(); + rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL); + if (rc != 0) { + CERROR("Can't register nal cmd handler\n"); + return (rc); + } + PORTAL_SYMBOL_REGISTER(kpr_router_interface); - PORTAL_SYMBOL_REGISTER(kpr_control_interface); return (0); } @@ -765,5 +819,4 @@ MODULE_LICENSE("GPL"); module_init (kpr_initialise); module_exit (kpr_finalise); -EXPORT_SYMBOL (kpr_control_interface); EXPORT_SYMBOL (kpr_router_interface); diff --git a/lnet/router/router.h b/lnet/router/router.h index 309025b3..0787064 100644 --- a/lnet/router/router.h +++ b/lnet/router/router.h @@ -93,15 +93,6 @@ extern void kpr_deregister_nal (void *arg); extern void kpr_proc_init (void); extern void kpr_proc_fini (void); -extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); -extern int kpr_del_route (int gw_nal, ptl_nid_t gw_nid, - ptl_nid_t lo, ptl_nid_t hi); -extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive); -extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid, - int alive, time_t when); - extern unsigned long long kpr_fwd_bytes; extern unsigned long kpr_fwd_packets; extern unsigned long kpr_fwd_errors; diff --git a/lnet/tests/.cvsignore b/lnet/tests/.cvsignore index d0c4c88..e034130 100644 --- a/lnet/tests/.cvsignore +++ b/lnet/tests/.cvsignore @@ -1,4 +1,10 @@ Makefile -Makefile.in .deps -.*.o.cmd +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/lnet/tests/Makefile.am b/lnet/tests/Makefile.am deleted file mode 100644 index 7b47ae0..0000000 --- a/lnet/tests/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Rules.linux - -LDFLAGS = -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -LINK = $(LD) $(LDFLAGS) -o $@ -DEFS = -LIBS = -MODULE = $(basename) -EXTRA_DIST = startserver.sh startclient.sh stopserver.sh stopclient.sh - -noinst_PROGRAMS = pingsrv.o pingcli.o spingsrv.o spingcli.o - -pingsrv_o_SOURCES = ping_srv.c ping.h - -pingcli_o_SOURCES = ping_cli.c ping.h - -spingsrv_o_SOURCES = sping_srv.c ping.h - -spingcli_o_SOURCES = sping_cli.c ping.h diff --git a/lnet/tests/Makefile.in b/lnet/tests/Makefile.in new file mode 100644 index 0000000..c309db0 --- /dev/null +++ b/lnet/tests/Makefile.in @@ -0,0 +1,16 @@ +MODULES := pingsrv pingcli spingsrv spingcli +pingsrv-objs := ping_srv.o + +ifeq ($(PATCHLEVEL),6) +pingcli-objs := ping_cli.o +spingsrv-objs := sping_srv.o +spingcli-objs := sping_cli.o +else +ping%.c: ping_%.c + ln -sf $< $@ + +sping%.c: sping_%.c + ln -sf $< $@ +endif + +@INCLUDE_RULES@ diff --git a/lnet/tests/autoMakefile.am b/lnet/tests/autoMakefile.am new file mode 100644 index 0000000..5f81b93 --- /dev/null +++ b/lnet/tests/autoMakefile.am @@ -0,0 +1,16 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +if MODULES +if !CRAY_PORTALS +if TESTS +noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT) +noinst_DATA += spingsrv$(KMODEXT) spingcli$(KMODEXT) +endif +endif +endif + +MOSTLYCLEANFILES = *.o *.ko *.mod.c pingsrv.c pingcli.c spingsrv.c spingcli.c +DIST_SOURCES = ping_srv.c ping_cli.c sping_srv.c sping_cli.c ping.h diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c index 9977f20..b216df1 100644 --- a/lnet/tests/ping_cli.c +++ b/lnet/tests/ping_cli.c @@ -46,7 +46,7 @@ static struct pingcli_data *client = NULL; static int count = 0; static void -pingcli_shutdown(int err) +pingcli_shutdown(ptl_handle_ni_t nih, int err) { int rc; @@ -70,7 +70,7 @@ pingcli_shutdown(int err) if ((rc = PtlMEUnlink (client->me))) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (client->args->ioc_nal); + PtlNIFini(nih); case 4: /* Free our buffers */ @@ -84,7 +84,7 @@ pingcli_shutdown(int err) CDEBUG (D_OTHER, "ping client released resources\n"); } /* pingcli_shutdown() */ -static int pingcli_callback(ptl_event_t *ev) +static void pingcli_callback(ptl_event_t *ev) { int i, magic; i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned)); @@ -92,21 +92,19 @@ static int pingcli_callback(ptl_event_t *ev) if(magic != 0xcafebabe) { printk ("LustreError: Unexpected response \n"); - return 1; } if((i == count) || !count) wake_up_process (client->tsk); else printk ("LustreError: Received response after timeout for %d\n",i); - return 1; } static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { - ptl_handle_ni_t *nip; + ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; unsigned ping_bulk_magic = PING_BULK_MAGIC; int rc; @@ -127,7 +125,7 @@ pingcli_start(struct portal_ioctl_data *args) if (client->outbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } @@ -136,23 +134,24 @@ pingcli_start(struct portal_ioctl_data *args) if (client->inbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) + rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + if (rc != PTL_OK || rc != PTL_IFACE_DUP) { CERROR ("NAL %d not loaded\n", args->ioc_nal); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) + if ((rc = PtlGetId (nih, &client->myid))) { CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -164,20 +163,20 @@ pingcli_start(struct portal_ioctl_data *args) client->id_remote.nid = args->ioc_nid; client->id_remote.pid = 0; - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, + if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, client->id_local, 0, ~0, PTL_RETAIN, PTL_INS_AFTER, &client->me))) { CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) + if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) { CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -196,7 +195,7 @@ pingcli_start(struct portal_ioctl_data *args) if ((rc = PtlMDAttach (client->me, client->md_in_head, PTL_UNLINK, &client->md_in_head_h))) { CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } /* Setup the outgoing ping header */ @@ -212,10 +211,10 @@ pingcli_start(struct portal_ioctl_data *args) count = 0; /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, + if ((rc=PtlMDBind (nih, client->md_out_head, PTL_UNLINK, &client->md_out_head_h))) { CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } while ((args->ioc_count - count)) { @@ -230,7 +229,7 @@ pingcli_start(struct portal_ioctl_data *args) if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } printk ("Lustre: sent msg no %d", count); @@ -255,7 +254,7 @@ pingcli_start(struct portal_ioctl_data *args) PORTAL_FREE (client->inbuf, (args->ioc_size + STDSIZE) * args->ioc_count); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); /* Success! */ return NULL; diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c index 0aa1ea7..188ba98 100644 --- a/lnet/tests/ping_srv.c +++ b/lnet/tests/ping_srv.c @@ -81,7 +81,7 @@ static void *pingsrv_shutdown(int err) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (nal); + PtlNIFini (server->ni); case 4: @@ -167,19 +167,18 @@ int pingsrv_thread(void *arg) return 0; } -static int pingsrv_packet(ptl_event_t *ev) +static void pingsrv_packet(ptl_event_t *ev) { atomic_inc (&pkt); wake_up_process (server->tsk); - return 1; } /* pingsrv_head() */ -static int pingsrv_callback(ptl_event_t *ev) +static void pingsrv_callback(ptl_event_t *ev) { if (ev == NULL) { CERROR ("null in callback, ev=%p\n", ev); - return 0; + return; } server->evnt = *ev; @@ -193,23 +192,24 @@ static int pingsrv_callback(ptl_event_t *ev) packets_valid++; - return pingsrv_packet(ev); + pingsrv_packet(ev); } /* pingsrv_callback() */ static struct pingsrv_data *pingsrv_setup(void) { - ptl_handle_ni_t *nip; int rc; + server->ni = PTL_INVALID_HANDLE; + /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { + rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) { CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); return pingsrv_shutdown (4); } - server->ni= *nip; /* Based on the initialization aquire our unique portal ID. */ if ((rc = PtlGetId (server->ni, &server->my_id))) { @@ -229,7 +229,7 @@ static struct pingsrv_data *pingsrv_setup(void) } - if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback, + if ((rc = PtlEQAlloc (server->ni, 1024, &pingsrv_callback, &server->eq))) { PDEBUG ("PtlEQAlloc (callback)", rc); return pingsrv_shutdown (2); @@ -298,7 +298,7 @@ static void /*__exit*/ pingsrv_cleanup(void) MODULE_PARM(nal, "i"); MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 2-ksocknal, 1-kqswnal)"); + "(2-ksocknal, 1-kqswnal)"); MODULE_AUTHOR("Brian Behlendorf (LLNL)"); MODULE_DESCRIPTION("A kernel space ping server for portals testing"); diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c index 663da4e..8e86491 100644 --- a/lnet/tests/sping_cli.c +++ b/lnet/tests/sping_cli.c @@ -51,7 +51,7 @@ static struct pingcli_data *client = NULL; static int count = 0; static void -pingcli_shutdown(int err) +pingcli_shutdown(ptl_handle_ni_t nih, int err) { int rc; @@ -72,7 +72,7 @@ pingcli_shutdown(int err) if ((rc = PtlMEUnlink (client->me))) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (client->args->ioc_nal); + PtlNIFini (nih); case 4: /* Free our buffers */ @@ -92,17 +92,16 @@ pingcli_shutdown(int err) CDEBUG (D_OTHER, "ping client released resources\n"); } /* pingcli_shutdown() */ -static int pingcli_callback(ptl_event_t *ev) +static void pingcli_callback(ptl_event_t *ev) { - wake_up_process (client->tsk); - return 1; + wake_up_process (client->tsk); } static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { - const ptl_handle_ni_t *nip; + ptl_handle_ni_t nih = PTL_INVALID_HANDLE; unsigned ping_head_magic = PING_HEADER_MAGIC; char str[PTL_NALFMT_SIZE]; int rc; @@ -122,7 +121,7 @@ pingcli_start(struct portal_ioctl_data *args) if (client->outbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } @@ -131,23 +130,24 @@ pingcli_start(struct portal_ioctl_data *args) if (client->inbuf == NULL) { CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) + rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CERROR ("NAL %d not loaded.\n", args->ioc_nal); - pingcli_shutdown (4); + pingcli_shutdown (nih, 4); return (NULL); } /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) + if ((rc = PtlGetId (nih, &client->myid))) { CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -159,20 +159,20 @@ pingcli_start(struct portal_ioctl_data *args) client->id_remote.nid = args->ioc_nid; client->id_remote.pid = 0; - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, + if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT, client->id_local, 0, ~0, PTL_RETAIN, PTL_INS_AFTER, &client->me))) { CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) + if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq))) { CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return (NULL); } @@ -189,7 +189,7 @@ pingcli_start(struct portal_ioctl_data *args) if ((rc = PtlMDAttach (client->me, client->md_in_head, PTL_UNLINK, &client->md_in_head_h))) { CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } @@ -204,17 +204,17 @@ pingcli_start(struct portal_ioctl_data *args) memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, + if ((rc=PtlMDBind (nih, client->md_out_head, PTL_UNLINK, &client->md_out_head_h))) { CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return (NULL); } /* Put the ping packet */ if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); + pingcli_shutdown (nih, 1); return NULL; } @@ -223,13 +223,13 @@ pingcli_start(struct portal_ioctl_data *args) rc = schedule_timeout (20 * args->ioc_timeout); if (rc == 0) { printk ("LustreError: Time out on the server\n"); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); return NULL; } else printk("Lustre: Received respose from the server \n"); - pingcli_shutdown (2); + pingcli_shutdown (nih, 2); /* Success! */ return NULL; diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c index e8fb470..d840ddd 100644 --- a/lnet/tests/sping_srv.c +++ b/lnet/tests/sping_srv.c @@ -53,7 +53,7 @@ #define STDSIZE (sizeof(int) + sizeof(int) + 4) -static int nal = 0; // Your NAL, +static int nal = PTL_IFACE_DEFAULT; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -86,7 +86,7 @@ static void *pingsrv_shutdown(int err) PDEBUG ("PtlMEUnlink", rc); case 3: - kportal_put_ni (nal); + PtlNIFini(server->ni); case 4: @@ -159,19 +159,18 @@ int pingsrv_thread(void *arg) return 0; } -static int pingsrv_packet(ptl_event_t *ev) +static void pingsrv_packet(ptl_event_t *ev) { atomic_inc (&pkt); wake_up_process (server->tsk); - return 1; } /* pingsrv_head() */ -static int pingsrv_callback(ptl_event_t *ev) +static void pingsrv_callback(ptl_event_t *ev) { if (ev == NULL) { CERROR ("null in callback, ev=%p\n", ev); - return 0; + return; } server->evnt = *ev; @@ -182,24 +181,24 @@ static int pingsrv_callback(ptl_event_t *ev) packets_valid++; - return pingsrv_packet(ev); + pingsrv_packet(ev); } /* pingsrv_callback() */ static struct pingsrv_data *pingsrv_setup(void) { - ptl_handle_ni_t *nip; int rc; /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { + server->ni = PTL_INVALID_HANDLE; + + rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); return pingsrv_shutdown (4); } - server->ni= *nip; - /* Based on the initialization aquire our unique portal ID. */ if ((rc = PtlGetId (server->ni, &server->my_id))) { PDEBUG ("PtlGetId", rc); @@ -285,7 +284,7 @@ static void /*__exit*/ pingsrv_cleanup(void) MODULE_PARM(nal, "i"); MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 2-ksocknal, 1-kqswnal)"); + "(2-ksocknal, 1-kqswnal)"); MODULE_AUTHOR("Brian Behlendorf (LLNL)"); MODULE_DESCRIPTION("A kernel space ping server for portals testing"); diff --git a/lnet/ulnds/Makefile.am b/lnet/ulnds/Makefile.am index 6035ca1..1681250 100644 --- a/lnet/ulnds/Makefile.am +++ b/lnet/ulnds/Makefile.am @@ -1,9 +1,10 @@ -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir) +if LIBLUSTRE noinst_LIBRARIES = libtcpnal.a +endif + +if !CRAY_PORTALS pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h - -if LIBLUSTRE -libtcpnal_a_CFLAGS = -fPIC -endif +libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS) +libtcpnal_a_CFLAGS = $(LLCFLAGS) +endif \ No newline at end of file diff --git a/lnet/ulnds/bridge.h b/lnet/ulnds/bridge.h index 9a90ab8..90ce324 100644 --- a/lnet/ulnds/bridge.h +++ b/lnet/ulnds/bridge.h @@ -10,6 +10,12 @@ #define TCPNAL_PROCBRIDGE_H #include +#include + +#define PTL_IFACE_TCP 1 +#define PTL_IFACE_ER 2 +#define PTL_IFACE_SS 3 +#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; @@ -22,12 +28,6 @@ typedef struct bridge { } *bridge; -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - typedef int (*nal_initialize)(bridge); extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lnet/ulnds/connection.c b/lnet/ulnds/connection.c index ca6999a..3448460 100644 --- a/lnet/ulnds/connection.c +++ b/lnet/ulnds/connection.c @@ -229,7 +229,7 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); hdr.msg.hello.type = __cpu_to_le32 (type); - hdr.msg.hello.incarnation = 0; + hdr.msg.hello.incarnation = __cpu_to_le64(incarnation); /* Assume sufficient socket buffering for this message */ rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); @@ -315,6 +315,8 @@ connection force_tcp_connection(manager m, connection conn; struct sockaddr_in addr; unsigned int id[2]; + struct timeval tv; + __u64 incarnation; port = tcpnal_acceptor_port; @@ -353,8 +355,11 @@ connection force_tcp_connection(manager m, setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); #endif + gettimeofday(&tv, NULL); + incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; + /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, 0)) + if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) exit(-1); conn = allocate_connection(m, ip, port, fd); diff --git a/lnet/ulnds/procapi.c b/lnet/ulnds/procapi.c index 00a7ae4..e40c4b9 100644 --- a/lnet/ulnds/procapi.c +++ b/lnet/ulnds/procapi.c @@ -95,7 +95,7 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static int procbridge_shutdown(nal_t *n, int ni) +static void procbridge_shutdown(nal_t *n) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; @@ -114,16 +114,6 @@ static int procbridge_shutdown(nal_t *n, int ni) } while (1); free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, size_t extent) -{ - return(0); } @@ -187,18 +177,20 @@ static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds) return (milliseconds); } +/* forward decl */ +extern int procbridge_startup (nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); /* api_nal * the interface vector to allow the generic code to access * this nal. this is seperate from the library side nal_cb. * TODO: should be dyanmically allocated */ -static nal_t api_nal = { - ni: {0}, +nal_t procapi_nal = { nal_data: NULL, - forward: procbridge_forward, + startup: procbridge_startup, shutdown: procbridge_shutdown, - validate: procbridge_validate, + forward: procbridge_forward, yield: procbridge_yield, lock: procbridge_lock, unlock: procbridge_unlock @@ -206,7 +198,7 @@ static nal_t api_nal = { ptl_nid_t tcpnal_mynid; -/* Function: procbridge_interface +/* Function: procbridge_startup * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -214,40 +206,34 @@ ptl_nid_t tcpnal_mynid; * and effectively ignored * actual: limits actually allocated and returned * - * Returns: a pointer to my statically allocated top side NAL - * structure + * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) +int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { nal_init_args_t args; + procbridge p; bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; + /* XXX nal_type is purely private to tcpnal here */ int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - if(initialized) return (&api_nal); + LASSERT(nal == &procapi_nal); init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; + nal->nal_data=b; b->local=p; - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; + args.nia_requested_limits = requested_limits; + args.nia_actual_limits = actual_limits; args.nia_nal_type = nal_type; args.nia_bridge = b; @@ -259,19 +245,19 @@ nal_t *procbridge_interface(int num_interface, /* initialize notifier */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { perror("socketpair failed"); - return NULL; + return PTL_FAIL; } if (!register_io_handler(p->notifier[1], READ_HANDLER, procbridge_notifier_handler, p)) { perror("fail to register notifier handler"); - return NULL; + return PTL_FAIL; } /* create nal thread */ if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); - return(NULL); + return PTL_FAIL; } do { @@ -285,10 +271,9 @@ nal_t *procbridge_interface(int num_interface, } while (1); if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); + return PTL_FAIL; b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - return (&api_nal); + return PTL_OK; } diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h index 965f83d..1c8e7dd 100644 --- a/lnet/ulnds/procbridge.h +++ b/lnet/ulnds/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lnet/ulnds/proclib.c b/lnet/ulnds/proclib.c index 1cfb233..af0745b 100644 --- a/lnet/ulnds/proclib.c +++ b/lnet/ulnds/proclib.c @@ -157,9 +157,6 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - extern int tcpnal_init(bridge); nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; @@ -170,10 +167,8 @@ void *nal_thread(void *z) bridge b = args->nia_bridge; procbridge p=b->local; int rc; - ptl_pid_t pid_request; + ptl_process_id_t process_id; int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); b->nal_cb->nal_data=b; @@ -189,28 +184,21 @@ void *nal_thread(void *z) b->nal_cb->cb_callback=nal_callback; b->nal_cb->cb_dist=nal_dist; - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; nal_type = args->nia_nal_type; - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); + /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is + * about to do from the process_id passed to it...*/ + set_address(b,args->nia_requested_pid); + process_id.pid = b->nal_cb->ni.pid; + process_id.nid = b->nal_cb->ni.nid; + if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); /* initialize the generic 'library' level code */ - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); + rc = lib_init(b->nal_cb, process_id, + args->nia_requested_limits, + args->nia_actual_limits); /* * Whatever the initialization returned is passed back to the @@ -219,11 +207,11 @@ void *nal_thread(void *z) */ /* this should perform error checking */ pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); - if (!rc) { + if (rc == PTL_OK) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -233,4 +221,3 @@ void *nal_thread(void *z) } return(0); } -#undef LIMIT diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am index 6035ca1..1681250 100644 --- a/lnet/ulnds/socklnd/Makefile.am +++ b/lnet/ulnds/socklnd/Makefile.am @@ -1,9 +1,10 @@ -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir) +if LIBLUSTRE noinst_LIBRARIES = libtcpnal.a +endif + +if !CRAY_PORTALS pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h - -if LIBLUSTRE -libtcpnal_a_CFLAGS = -fPIC -endif +libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS) +libtcpnal_a_CFLAGS = $(LLCFLAGS) +endif \ No newline at end of file diff --git a/lnet/ulnds/socklnd/bridge.h b/lnet/ulnds/socklnd/bridge.h index 9a90ab8..90ce324 100644 --- a/lnet/ulnds/socklnd/bridge.h +++ b/lnet/ulnds/socklnd/bridge.h @@ -10,6 +10,12 @@ #define TCPNAL_PROCBRIDGE_H #include +#include + +#define PTL_IFACE_TCP 1 +#define PTL_IFACE_ER 2 +#define PTL_IFACE_SS 3 +#define PTL_IFACE_MAX 4 typedef struct bridge { int alive; @@ -22,12 +28,6 @@ typedef struct bridge { } *bridge; -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - typedef int (*nal_initialize)(bridge); extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c index ca6999a..3448460 100644 --- a/lnet/ulnds/socklnd/connection.c +++ b/lnet/ulnds/socklnd/connection.c @@ -229,7 +229,7 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); hdr.msg.hello.type = __cpu_to_le32 (type); - hdr.msg.hello.incarnation = 0; + hdr.msg.hello.incarnation = __cpu_to_le64(incarnation); /* Assume sufficient socket buffering for this message */ rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); @@ -315,6 +315,8 @@ connection force_tcp_connection(manager m, connection conn; struct sockaddr_in addr; unsigned int id[2]; + struct timeval tv; + __u64 incarnation; port = tcpnal_acceptor_port; @@ -353,8 +355,11 @@ connection force_tcp_connection(manager m, setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); #endif + gettimeofday(&tv, NULL); + incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; + /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, 0)) + if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) exit(-1); conn = allocate_connection(m, ip, port, fd); diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c index 00a7ae4..e40c4b9 100644 --- a/lnet/ulnds/socklnd/procapi.c +++ b/lnet/ulnds/socklnd/procapi.c @@ -95,7 +95,7 @@ static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len, * cleanup nal state, reclaim the lower side thread and * its state using PTL_FINI codepoint */ -static int procbridge_shutdown(nal_t *n, int ni) +static void procbridge_shutdown(nal_t *n) { bridge b=(bridge)n->nal_data; procbridge p=(procbridge)b->local; @@ -114,16 +114,6 @@ static int procbridge_shutdown(nal_t *n, int ni) } while (1); free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, size_t extent) -{ - return(0); } @@ -187,18 +177,20 @@ static int procbridge_yield(nal_t *n, unsigned long *flags, int milliseconds) return (milliseconds); } +/* forward decl */ +extern int procbridge_startup (nal_t *, ptl_pid_t, + ptl_ni_limits_t *, ptl_ni_limits_t *); /* api_nal * the interface vector to allow the generic code to access * this nal. this is seperate from the library side nal_cb. * TODO: should be dyanmically allocated */ -static nal_t api_nal = { - ni: {0}, +nal_t procapi_nal = { nal_data: NULL, - forward: procbridge_forward, + startup: procbridge_startup, shutdown: procbridge_shutdown, - validate: procbridge_validate, + forward: procbridge_forward, yield: procbridge_yield, lock: procbridge_lock, unlock: procbridge_unlock @@ -206,7 +198,7 @@ static nal_t api_nal = { ptl_nid_t tcpnal_mynid; -/* Function: procbridge_interface +/* Function: procbridge_startup * * Arguments: pid: requested process id (port offset) * PTL_ID_ANY not supported. @@ -214,40 +206,34 @@ ptl_nid_t tcpnal_mynid; * and effectively ignored * actual: limits actually allocated and returned * - * Returns: a pointer to my statically allocated top side NAL - * structure + * Returns: portals rc * * initializes the tcp nal. we define unix_failure as an * error wrapper to cut down clutter. */ -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) +int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { nal_init_args_t args; + procbridge p; bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; + /* XXX nal_type is purely private to tcpnal here */ int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - if(initialized) return (&api_nal); + LASSERT(nal == &procapi_nal); init_unix_timer(); b=(bridge)malloc(sizeof(struct bridge)); p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; + nal->nal_data=b; b->local=p; - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - args.nia_requested_pid = requested_pid; - args.nia_limits = &limits; + args.nia_requested_limits = requested_limits; + args.nia_actual_limits = actual_limits; args.nia_nal_type = nal_type; args.nia_bridge = b; @@ -259,19 +245,19 @@ nal_t *procbridge_interface(int num_interface, /* initialize notifier */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { perror("socketpair failed"); - return NULL; + return PTL_FAIL; } if (!register_io_handler(p->notifier[1], READ_HANDLER, procbridge_notifier_handler, p)) { perror("fail to register notifier handler"); - return NULL; + return PTL_FAIL; } /* create nal thread */ if (pthread_create(&p->t, NULL, nal_thread, &args)) { perror("nal_init: pthread_create"); - return(NULL); + return PTL_FAIL; } do { @@ -285,10 +271,9 @@ nal_t *procbridge_interface(int num_interface, } while (1); if (p->nal_flags & NAL_FLAG_STOPPED) - return (NULL); + return PTL_FAIL; b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - return (&api_nal); + return PTL_OK; } diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h index 965f83d..1c8e7dd 100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ b/lnet/ulnds/socklnd/procbridge.h @@ -35,7 +35,8 @@ typedef struct procbridge { typedef struct nal_init_args { ptl_pid_t nia_requested_pid; - ptl_ni_limits_t *nia_limits; + ptl_ni_limits_t *nia_requested_limits; + ptl_ni_limits_t *nia_actual_limits; int nia_nal_type; bridge nia_bridge; } nal_init_args_t; @@ -50,10 +51,6 @@ extern void *nal_thread(void *); #define MAX_PTLS 128 extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); extern void procbridge_wakeup_nal(procbridge p); #endif diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c index 1cfb233..af0745b 100644 --- a/lnet/ulnds/socklnd/proclib.c +++ b/lnet/ulnds/socklnd/proclib.c @@ -157,9 +157,6 @@ static void check_stopping(void *z) * We define a limit macro to place a ceiling on limits * for syntactic convenience */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - extern int tcpnal_init(bridge); nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; @@ -170,10 +167,8 @@ void *nal_thread(void *z) bridge b = args->nia_bridge; procbridge p=b->local; int rc; - ptl_pid_t pid_request; + ptl_process_id_t process_id; int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); b->nal_cb->nal_data=b; @@ -189,28 +184,21 @@ void *nal_thread(void *z) b->nal_cb->cb_callback=nal_callback; b->nal_cb->cb_dist=nal_dist; - pid_request = args->nia_requested_pid; - desired = *args->nia_limits; nal_type = args->nia_nal_type; - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); + /* Wierd, but this sets b->nal_cb->ni.{nid,pid}, which lib_init() is + * about to do from the process_id passed to it...*/ + set_address(b,args->nia_requested_pid); + process_id.pid = b->nal_cb->ni.pid; + process_id.nid = b->nal_cb->ni.nid; + if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); /* initialize the generic 'library' level code */ - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); + rc = lib_init(b->nal_cb, process_id, + args->nia_requested_limits, + args->nia_actual_limits); /* * Whatever the initialization returned is passed back to the @@ -219,11 +207,11 @@ void *nal_thread(void *z) */ /* this should perform error checking */ pthread_mutex_lock(&p->mutex); - p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; + p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; pthread_cond_broadcast(&p->cond); pthread_mutex_unlock(&p->mutex); - if (!rc) { + if (rc == PTL_OK) { /* the thunk function is called each time the timer loop performs an operation and returns to blocking mode. we overload this function to inform the api side that @@ -233,4 +221,3 @@ void *nal_thread(void *z) } return(0); } -#undef LIMIT diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index e871d9a..a14df1c 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -3,26 +3,29 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -# ../ for , ../../ for -COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I$(srcdir)/../../include -LINK = $(CC) -o $@ +## $(srcdir)/../ for , ../../ for generated +#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include +#LINK = $(CC) -o $@ if LIBLUSTRE - noinst_LIBRARIES = libuptlctl.a -libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h -libuptlctl_a_CFLAGS = -fPIC +libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c +libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS) +libuptlctl_a_CFLAGS = $(LLCFLAGS) +endif -else +sbin_PROGRAMS = debugctl -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid lib_LIBRARIES = libptlctl.a -acceptor_SOURCES = acceptor.c # -lefence +libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h + +if !CRAY_PORTALS +sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck gmnalnid -wirecheck_SOURCES = wirecheck.c +acceptor_SOURCES = acceptor.c -libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h +wirecheck_SOURCES = wirecheck.c gmnalnid_SOURCES = gmnalnid.c @@ -30,9 +33,10 @@ ptlctl_SOURCES = ptlctl.c ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) ptlctl_DEPENDENCIES = libptlctl.a +routerstat_SOURCES = routerstat.c +endif + debugctl_SOURCES = debugctl.c debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) debugctl_DEPENDENCIES = libptlctl.a -routerstat_SOURCES = routerstat.c -endif diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c index 29b8d1e..f6367d4 100644 --- a/lnet/utils/acceptor.c +++ b/lnet/utils/acceptor.c @@ -11,9 +11,7 @@ #include #include #include -#include #include - #include #include diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index 00a6102..66c3807 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -65,9 +65,9 @@ static int debug_mask = ~0; static const char *portal_debug_subsystems[] = {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite", - "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter", - "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", "ibnal", - "lmv", NULL}; + "rpc", "mgmt", "portals", "libcfs", "socknal", "qswnal", "pinger", + "filter", "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", + "ibnal", "lmv", "smfs", "cmobd", NULL}; static const char *portal_debug_masks[] = {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", "blocks", "net", "warning", "buffs", "other", "dentry", "portals", @@ -183,9 +183,6 @@ static int applymask(char* procpath, int value) return 0; } -extern char *dump_filename; -extern int dump(int dev_id, int opc, void *buf); - static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) { if (!dump_filename) { @@ -535,27 +532,27 @@ static struct mod_paths { {"obdclass", "lustre/obdclass"}, {"llog_test", "lustre/obdclass"}, {"ptlrpc", "lustre/ptlrpc"}, - {"obdext2", "lustre/obdext2"}, {"ost", "lustre/ost"}, {"osc", "lustre/osc"}, {"mds", "lustre/mds"}, {"mdc", "lustre/mdc"}, {"llite", "lustre/llite"}, + {"smfs", "lustre/smfs"}, {"obdecho", "lustre/obdecho"}, {"ldlm", "lustre/ldlm"}, {"obdfilter", "lustre/obdfilter"}, {"extN", "lustre/extN"}, {"lov", "lustre/lov"}, + {"lmv", "lustre/lmv"}, {"fsfilt_ext3", "lustre/lvfs"}, {"fsfilt_extN", "lustre/lvfs"}, {"fsfilt_reiserfs", "lustre/lvfs"}, - {"mds_ext2", "lustre/mds"}, - {"mds_ext3", "lustre/mds"}, - {"mds_extN", "lustre/mds"}, + {"fsfilt_smfs", "lustre/lvfs"}, {"ptlbd", "lustre/ptlbd"}, {"mgmt_svc", "lustre/mgmt"}, {"mgmt_cli", "lustre/mgmt"}, - {"lmv", "lustre/lmv"}, + {"cobd", "lustre/cobd"}, + {"cmobd", "lustre/cmobd"}, {NULL, NULL} }; diff --git a/lnet/utils/gmlndnid.c b/lnet/utils/gmlndnid.c index ff6631c..e45fae4 100644 --- a/lnet/utils/gmlndnid.c +++ b/lnet/utils/gmlndnid.c @@ -29,9 +29,7 @@ #include #include #include -#include #include - #include #include diff --git a/lnet/utils/l_ioctl.c b/lnet/utils/l_ioctl.c index 1adcc8e..0671c24 100644 --- a/lnet/utils/l_ioctl.c +++ b/lnet/utils/l_ioctl.c @@ -56,7 +56,7 @@ static struct ioc_dev ioc_dev_list[10]; struct dump_hdr { int magic; int dev_id; - int opc; + unsigned int opc; }; char *dump_filename; @@ -101,7 +101,7 @@ open_ioc_dev(int dev_id) static int -do_ioctl(int dev_id, int opc, void *buf) +do_ioctl(int dev_id, unsigned int opc, void *buf) { int fd, rc; @@ -131,7 +131,7 @@ get_dump_file() * used, but for now it will assumed whatever app reads the file will * know what to do. */ int -dump(int dev_id, int opc, void *buf) +dump(int dev_id, unsigned int opc, void *buf) { FILE *fp; struct dump_hdr dump_hdr; @@ -212,7 +212,7 @@ set_ioctl_dump(char * file) } int -l_ioctl(int dev_id, int opc, void *buf) +l_ioctl(int dev_id, unsigned int opc, void *buf) { return current_ioc_handler(dev_id, opc, buf); } @@ -226,7 +226,7 @@ l_ioctl(int dev_id, int opc, void *buf) * each device used in the dump. */ int -parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)) +parse_dump(char * dump_file, ioc_handler_t ioc_func) { int line =0; struct stat st; diff --git a/lnet/utils/parser.c b/lnet/utils/parser.c index db5292d..b91295b 100644 --- a/lnet/utils/parser.c +++ b/lnet/utils/parser.c @@ -28,12 +28,16 @@ #include #include -#include #ifdef HAVE_LIBREADLINE #define READLINE_LIBRARY #include + +/* completion_matches() is #if 0-ed out in modern glibc */ +#ifndef completion_matches +# define completion_matches rl_completion_matches +#endif #endif -//extern char **completion_matches __P((char *, rl_compentry_func_t *)); + extern void using_history(void); extern void stifle_history(int); extern void add_history(char *); @@ -221,14 +225,15 @@ static char **command_completion(char * text, int start, int end) char * pos; match_tbl = top_level; + for (table = find_cmd(rl_line_buffer, match_tbl, &pos); - table; - table = find_cmd(pos, match_tbl, &pos)) { + table; table = find_cmd(pos, match_tbl, &pos)) + { if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; } - return(completion_matches(text, command_generator)); + return completion_matches(text, command_generator); } #endif @@ -637,68 +642,6 @@ int Parser_arg2int(const char *inp, long *result, int base) return 1; } -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size (int *sizep, char *str) { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool (int *b, char *str) { - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "down") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "up") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - int Parser_quit(int argc, char **argv) { argc = argc; diff --git a/lnet/utils/parser.h b/lnet/utils/parser.h index 44e8f2a..9e7e95a 100644 --- a/lnet/utils/parser.h +++ b/lnet/utils/parser.h @@ -64,10 +64,4 @@ char *Parser_strarg(char *inp, const char *prompt, const char *deft, /* Extracts an integer from a string with a base */ int Parser_arg2int(const char *inp, long *result, int base); -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size(int *sizep, char *str); - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool(int *b, char *str); - #endif diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index fb031ae..f8107d8 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #ifdef __CYGWIN__ @@ -61,7 +61,7 @@ unsigned int portal_debug; unsigned int portal_printk; unsigned int portal_stack; -unsigned int portal_cerror; +unsigned int portal_cerror = 1; static unsigned int g_nal = 0; @@ -81,12 +81,73 @@ static name2num_t nalnames[] = { {"elan", QSWNAL}, {"gm", GMNAL}, {"ib", IBNAL}, - {"scimac", SCIMACNAL}, {NULL, -1} }; static cfg_record_cb_t g_record_cb; +/* Convert a string boolean to an int; "enable" -> 1 */ +int ptl_parse_bool (int *b, char *str) { + if (!strcasecmp (str, "no") || + !strcasecmp (str, "n") || + !strcasecmp (str, "off") || + !strcasecmp (str, "down") || + !strcasecmp (str, "disable")) + { + *b = 0; + return (0); + } + + if (!strcasecmp (str, "yes") || + !strcasecmp (str, "y") || + !strcasecmp (str, "on") || + !strcasecmp (str, "up") || + !strcasecmp (str, "enable")) + { + *b = 1; + return (0); + } + + return (-1); +} + +/* Convert human readable size string to and int; "1k" -> 1000 */ +int ptl_parse_size (int *sizep, char *str) { + int size; + char mod[32]; + + switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { + default: + return (-1); + + case 1: + *sizep = size; + return (0); + + case 2: + switch (*mod) { + case 'g': + case 'G': + *sizep = size << 30; + return (0); + + case 'm': + case 'M': + *sizep = size << 20; + return (0); + + case 'k': + case 'K': + *sizep = size << 10; + return (0); + + default: + *sizep = size; + return (0); + } + } +} + int ptl_set_cfg_record_cb(cfg_record_cb_t cb) { @@ -303,16 +364,40 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) return (-1); } +__u64 ptl_nid2u64(ptl_nid_t nid) +{ + switch (sizeof (nid)) { + case 8: + return (nid); + case 4: + return ((__u32)nid); + default: + fprintf(stderr, "Unexpected sizeof(ptl_nid_t) == %u\n", sizeof(nid)); + abort(); + /* notreached */ + return (-1); + } +} + char * ptl_nid2str (char *buffer, ptl_nid_t nid) { - __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */ - struct hostent *he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET); + __u64 nid64 = ptl_nid2u64(nid); + struct hostent *he; + + if ((nid64 & ~((__u64)((__u32)-1))) != 0) { + /* top bits set */ + he = NULL; + } else { + __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */ + + he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET); + } if (he != NULL) strcpy (buffer, he->h_name); else - sprintf (buffer, LPX64, nid); + sprintf (buffer, LPX64, nid64); return (buffer); } @@ -1008,7 +1093,7 @@ int jt_ptl_mynid(int argc, char **argv) char *nidstr; struct portals_cfg pcfg; ptl_nid_t mynid; - + if (argc > 2) { fprintf(stderr, "usage: %s [NID]\n", argv[0]); fprintf(stderr, "NID defaults to the primary IP address of the machine.\n"); @@ -1042,7 +1127,8 @@ int jt_ptl_mynid(int argc, char **argv) fprintf(stderr, "setting my NID failed: %s\n", strerror(errno)); else - printf("registered my nid "LPX64" (%s)\n", mynid, hostname); + printf("registered my nid "LPX64" (%s)\n", + ptl_nid2u64(mynid), hostname); return 0; } @@ -1100,7 +1186,7 @@ jt_ptl_rxmem (int argc, char **argv) if (argc > 1) { - if (Parser_size (&size, argv[1]) != 0 || size < 0) + if (ptl_parse_size (&size, argv[1]) != 0 || size < 0) { fprintf (stderr, "Can't parse size %s\n", argv[1]); return (0); @@ -1119,7 +1205,7 @@ jt_ptl_txmem (int argc, char **argv) if (argc > 1) { - if (Parser_size (&size, argv[1]) != 0 || size < 0) + if (ptl_parse_size (&size, argv[1]) != 0 || size < 0) { fprintf (stderr, "Can't parse size %s\n", argv[1]); return (0); @@ -1137,7 +1223,7 @@ jt_ptl_nagle (int argc, char **argv) if (argc > 1) { - if (Parser_bool (&enable, argv[1]) != 0) + if (ptl_parse_bool (&enable, argv[1]) != 0) { fprintf (stderr, "Can't parse boolean %s\n", argv[1]); return (-1); @@ -1260,7 +1346,8 @@ jt_ptl_del_route (int argc, char **argv) rc = pcfg_ioctl(&pcfg); if (rc != 0) { - fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno)); + fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n", + ptl_nid2u64(nid), strerror (errno)); return (-1); } @@ -1290,7 +1377,7 @@ jt_ptl_notify_router (int argc, char **argv) return (-1); } - if (Parser_bool (&enable, argv[2]) != 0) { + if (ptl_parse_bool (&enable, argv[2]) != 0) { fprintf (stderr, "Can't parse boolean %s\n", argv[2]); return (-1); } @@ -1322,7 +1409,7 @@ jt_ptl_notify_router (int argc, char **argv) if (rc != 0) { fprintf (stderr, "NAL_CMD_NOTIFY_ROUTER ("LPX64") failed: %s\n", - nid, strerror (errno)); + ptl_nid2u64(nid), strerror (errno)); return (-1); } @@ -1478,14 +1565,11 @@ lwt_put_string(char *ustr) static int lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e) { - char whenstr[32]; char *where = lwt_get_string(e->lwte_where); if (where == NULL) return (-1); - sprintf(whenstr, LPD64, e->lwte_when - t0); - fprintf(f, "%#010lx %#010lx %#010lx %#010lx: %#010lx %1d %10.6f %10.2f %s\n", e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4, (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0), @@ -1537,6 +1621,7 @@ jt_ptl_lwt(int argc, char **argv) cycles_t tnow; struct timeval tvnow; int printed_date = 0; + int nlines = 0; FILE *f = stdout; if (argc < 2 || @@ -1686,6 +1771,12 @@ jt_ptl_lwt(int argc, char **argv) rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); if (rc != 0) break; + + if (++nlines % 10000 == 0 && f != stdout) { + /* show some activity... */ + printf("."); + fflush (stdout); + } } tlast = next_event[cpu]->lwte_when; @@ -1699,8 +1790,10 @@ jt_ptl_lwt(int argc, char **argv) next_event[cpu] = NULL; } - if (f != stdout) + if (f != stdout) { + printf("\n"); fclose(f); + } free(events); return (0); diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c index a73a5217..6316290 100644 --- a/lnet/utils/wirecheck.c +++ b/lnet/utils/wirecheck.c @@ -34,7 +34,7 @@ do { \ #define CHECK_MEMBER_OFFSET(s,m) \ do { \ - CHECK_VALUE(offsetof(s, m)); \ + CHECK_VALUE((int)offsetof(s, m)); \ } while (0) #define CHECK_MEMBER_SIZEOF(s,m) \ diff --git a/lustre/.cvsignore b/lustre/.cvsignore index f37e59b..07a5c92 100644 --- a/lustre/.cvsignore +++ b/lustre/.cvsignore @@ -4,9 +4,12 @@ aclocal.m4 config.log config.status config.cache +config.guess +config.sub configure Makefile -Makefile.in +autoMakefile +autoMakefile.in .deps tags TAGS @@ -15,5 +18,13 @@ cscope.files cscope.out autom4te-2.53.cache autom4te.cache -.*.o.cmd +depcomp +compile +.*.cmd .mergeinfo-* +Rules +missing +mkinstalldirs +install-sh +.depend +.tmp_versions diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 0eb3b7f..1b957a3 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,9 +1,59 @@ tbd Cluster File Systems, Inc. - * version 1.2.x - * Bug fixes - - clear page cache after eviction (2766) + * version 1.2.x + * bug fixes + - clear page cache after eviction (2766) + - don't dereference NULL peer_ni in ldlm_handle_ast_error (3258) + - don't allow unlinking open directory if it isn't empty (2904) + - handle partial page writes in filter; fix 512b direct IO (3138) + - handle page cache pages in cleanup path for 2.6 (3335) + - leave liblustre's partial write handling to filter (3274) + - chose better nal ids in liblustre (3292) + - initialize liblustre with uid/group membership (2862) + * miscellania + - drop scimac NAL (unmaintained) tbd Cluster File Systems, Inc. + * version 1.2.2 + * bug fixes + - don't copy lvb into (possibly NULL) reply on error (2983) + - don't deref dentry after dput, don't free lvb on error (2922) + - use the kms to determine writeback rpc length (2947) + - increment oti_logcookies when osc is inactive (2948) + - update client's i_blocks count via lvb messages (2543) + - handle intent open/close of special files properly (1557) + - mount MDS with errors=remount-ro, like obdfilter (2009) + - initialize lock handle to avoid ASSERT on error cleanup (3057) + - don't use cancelling-locks' kms values (2947) + - use highest lock extent for kms, not last one (2925) + - don't dereference ERR_PTR() dentry in error handling path (3107) + - fix thread race in portals_debug_dumplog() (3122) + - create lprocfs device entries at setup instead of at attach (1519) + - common AST error handler, don't evict client on completion race (3145) + - zero nameidata in detach_mnt in 2.6 (3118) + - verify d_inode after revalidate_special is valid in 2.6 (3116) + - use lustre_put_super() to handle zconf unmounts in 2.6 (3064) + - initialize RPC timeout timer earlier for 2.6 (3219) + - don't dereference NULL reply buffer if mdc_close was never sent (2410) + - print nal/nid for unknown nid (3258) + - additional checks for oscc recovery before doing precreate (3284) + - fix ll_extent_lock() error return code for 64-bit systems (3043) + - don't crash in mdc_close for bad permissions on open (3285) + - zero i_rdev for non-device files (3147) + - clear page->private before handing to FS, better assertion (3119) + - fix incorrect decref of invalidated dentry (2350) + - don't hold journal transaction open across create RPC (3313) + - update atime on MDS at close time (3265) + - close LDAP connection when recovering to avoid server load (3315) + - update iopen-2.6 patch with fixes from 2399,2517,2904 (3301) + - don't leak open file on MDS after open resend (3325) + - serialize filter_precreate and filter_destroy_precreated (3329) + * miscellania + - allow default OST striping configuration per directory (1414) + - fix compilation for qswnal for 2.6 kernels (3125) + - increase maximum number of MDS request buffers for large systems + - change liblustreapi to be useful for external progs like lfsck (3098) + +2004-03-22 Cluster File Systems, Inc. * version 1.2.1 * bug fixes - fixes for glimpse AST timeouts / incorrectly 0-sized files (2818) @@ -25,6 +75,8 @@ tbd Cluster File Systems, Inc. - don't evict page beyond end of stripe extent (2925) - don't oops on a deleted current working directory (2399) - handle hard links to targets without a parent properly (2517) + - don't dereference NULL lock when racing during eviction (2867) + - don't grow lock extents when lots of conflicting locks (2919) 2004-03-04 Cluster File Systems, Inc. * version 1.2.0 diff --git a/lustre/Makefile.am b/lustre/Makefile.am deleted file mode 100644 index 2d4192e..0000000 --- a/lustre/Makefile.am +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -AUTOMAKE_OPTIONS = foreign - -if LINUX25 -DIRS24 = -else -DIRS24 = ptlbd -endif - -# just until things are farther along -if CRAY_PORTALS -UTILS_TESTS = -else -UTILS_TESTS = utils tests -endif - -if LIBLUSTRE -SUBDIRS = portals obdclass lov ptlrpc obdecho osc utils mdc lvfs liblustre lmv -else -SUBDIRS = lvfs portals obdclass include $(DIRS24) mds obdfilter mdc osc ost -SUBDIRS+= llite obdecho lov cobd doc scripts conf ptlrpc lmv $(UTILS_TESTS) -endif - - -if SNAPFS -SUBDIRS+= snapfs -endif - -if SMFS -SUBDIRS += smfs -endif -DIST_SUBDIRS = $(SUBDIRS) liblustre snapfs smfs -EXTRA_DIST = BUGS FDL Rules kernel_patches - -# We get the version from the spec file. -CONFIGURE_DEPENDENCIES = scripts/lustre.spec.in - -dist-hook: - find $(distdir) -name .deps | xargs rm -rf - find $(distdir) -name CVS | xargs rm -rf - -include $(top_srcdir)/Rules - -rpms: dist Makefile - rpmbuild -ta $(distdir).tar.gz - -CSTK=/tmp/checkstack -CSTKO=/tmp/checkstack.orig - -checkstack: - [ -f ${CSTK} -a ! -s ${CSTKO} ] && mv ${CSTK} ${CSTKO} || true - for i in ${SUBDIRS} portals/knals/*; do \ - MOD=$$i/`basename $$i`.o; \ - [ -f $$MOD ] && objdump -d $$MOD | perl tests/checkstack.pl; \ - done | sort -nr > ${CSTK} - [ -f ${CSTKO} ] && ! diff -u ${CSTKO} ${CSTK} || head -30 ${CSTK} - -checkstack-update: - [ -f ${CSTK} ] && mv ${CSTK} ${CSTKO} - -checkstack-clean: - rm -f ${CSTK} ${CSTKO} diff --git a/lustre/Makefile.in b/lustre/Makefile.in new file mode 100644 index 0000000..ad621aa --- /dev/null +++ b/lustre/Makefile.in @@ -0,0 +1,27 @@ +subdir-m += portals + +@LDISKFS_TRUE@subdir-m += ldiskfs + +subdir-m += lvfs +subdir-m += obdclass +subdir-m += lov +subdir-m += lmv +subdir-m += ptlrpc +subdir-m += obdecho +subdir-m += osc +subdir-m += mdc +subdir-m += mds +subdir-m += obdfilter +subdir-m += ost +subdir-m += llite +subdir-m += cobd + +ifeq ($(PATCHLEVEL),4) +subdir-m += ptlbd +endif # PATCHLEVEL = 4 + +@SNAPFS_TRUE@subdir-m += snapfs +@SMFS_TRUE@subdir-m += smfs +@SMFS_TRUE@subdir-m += cmobd + +@INCLUDE_RULES@ diff --git a/lustre/Makefile.mk b/lustre/Makefile.mk index 9e1a9dd..6b1aeca 100644 --- a/lustre/Makefile.mk +++ b/lustre/Makefile.mk @@ -14,6 +14,7 @@ obj-y += obdecho/ obj-y += osc/ obj-y += ost/ obj-y += lov/ +obj-y += lmv/ obj-y += llite/ obj-y += lvfs/ diff --git a/lustre/README.kernel-source b/lustre/README.kernel-source new file mode 100644 index 0000000..0b88efe1 --- /dev/null +++ b/lustre/README.kernel-source @@ -0,0 +1,74 @@ +Lustre can currently build against Red Hat 2.4-style kernel-source +RPMs. All other kernel-source RPMs are *unsupported* at this time. + +Note that a Lustre-patched kernel is required for building Lustre; in +most cases a kernel-source RPM from your Linux vendor will not contain +the necessary patches. + +1. kernel.h + +Building against a kernel-source RPM requires a special header. On +Red Hat systems, this file should be automatically created at boot +time, and saved in /boot/kernel.h. + + *** If you are not running Red Hat Linux, or are not booted into the + *** kernel you are trying to build against, you need to create this + *** file manually. + + *** If you do not, the Lustre build may fail, or may fail to build + *** modules that work with your kernel. + +Here is an example /boot/kernel.h file. If you are building on +x86_64, the first defines should be __MODULE_KERNEL_x86_64, etc. The +other defines should be simple to figure out. + +/* This file is automatically generated at boot time. */ +#ifndef __BOOT_KERNEL_H_ +#define __BOOT_KERNEL_H_ + +/* Kernel type i686-smp */ + +#ifndef __MODULE_KERNEL_i686 +#define __MODULE_KERNEL_i686 1 +#endif + +#ifndef __BOOT_KERNEL_ENTERPRISE +#define __BOOT_KERNEL_ENTERPRISE 0 +#endif + +#ifndef __BOOT_KERNEL_BIGMEM +#define __BOOT_KERNEL_BIGMEM 0 +#endif + +#ifndef __BOOT_KERNEL_HUGEMEM +#define __BOOT_KERNEL_HUGEMEM 0 +#endif + +#ifndef __BOOT_KERNEL_SMP +#define __BOOT_KERNEL_SMP 1 +#endif + +#ifndef __BOOT_KERNEL_UP +#define __BOOT_KERNEL_UP 0 +#endif + +#endif + +You should save this somewhere, and pass the location of this file to +./configure using the --with-kernel-source-header option. + +2. .config + +You will also need to tell Lustre about the .config file for your +kernel. The two likely locations of this file are +/boot/config-$(uname -r), and /usr/src/linux-2.4/configs/. You should +pass the location of this file to Lustre using the --with-linux-config +option. + +3. An Example + +Here is an example for configuring Lustre: + +./configure --with-linux=/usr/src/linux-2.4.20-28.9_lustre.1.0.3 \ +--with-kernel-source-header=/boot/kernel.h \ +--with-linux-config=/boot/config-2.4.20-28.9_lustre.1.0.3smp diff --git a/lustre/Rules b/lustre/Rules deleted file mode 100644 index 8846e3b..0000000 --- a/lustre/Rules +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -# Build a kernel module, name.o, and install it in $(moduledir) by: -# MODULE = name -# module_DATA = name.o -# EXTRA_PROGRAMS = name -# name_SOURCES = my.c files.c -# include $(top_srcdir)/Rules - -if LINUX25 - -# FIXME -# need to be rewritten: -# - bad hacking in lvfs/Makefile.am obdclass/Makefile.am -# - .o -> .ko -# -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g') -AM_CPPFLAGS=-I$(top_builddir)/include -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) - -$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) - $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS) - rm -f $(MODULE)_tmp.c - $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o - $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o - -else - -AM_CPPFLAGS=-I$(top_builddir)/include -$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) - $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -endif - - -tags: - rm -f $(top_srcdir)/TAGS - ETAGSF=`etags --version | grep -iq exuberant && \ - echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \ - find $(top_srcdir) -name '*.[hc]' | xargs etags $$ETAGSF -a - - rm -f $(top_srcdir)/tags - CTAGSF=`ctags --version | grep -iq exuberant && \ - echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \ - find $(top_srcdir) -name '*.[hc]' | xargs ctags $$CTAGSF -a - diff --git a/lustre/Rules.in b/lustre/Rules.in new file mode 100644 index 0000000..293ff3c --- /dev/null +++ b/lustre/Rules.in @@ -0,0 +1,46 @@ +# Directories building kernel modules should have two files: +# +# Makefile.in: +# +# MODULES := +# -objs := file1.o file2.o file3.o +# @INCLUDE_RULES@ +# +# and autoMakefile.am: +# +# if LIBLUSTRE +# +# endif +# +# if MODULES +# modulefs_DATA = $(KMODEXT) +# endif +# +# DIST_SOURCES = $(-objs:.o=.c) +# MOSTLYCLEANFILES = *.o *.ko *.mod.c + +ifeq ($(PATCHLEVEL),) + +include autoMakefile + +else + +include @LINUX_CONFIG@ + +EXTRA_CFLAGS := $(EXTRA_PRE_CFLAGS) +EXTRA_CFLAGS += @EXTRA_KCFLAGS@ @UML_CFLAGS@ +EXTRA_CFLAGS += $(EXTRA_POST_CFLAGS) + +obj-m := $(patsubst %,%.o,$(MODULES)) + +ifeq ($(PATCHLEVEL),4) +# 2.4 rules +O_TARGET := $(firstword $(obj-m)) +obj-y := $($(firstword $(MODULES))-objs) +export-objs := $(obj-y) $(filter-out $(O_TARGET),$(obj-m)) +include $(TOPDIR)/Rules.make +$(MODINCL)/%.ver: %.c + @true +endif # PATCHLEVEL + +endif # KERNELRELEASE diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am new file mode 100644 index 0000000..c5d668d --- /dev/null +++ b/lustre/autoMakefile.am @@ -0,0 +1,91 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +AUTOMAKE_OPTIONS = foreign + +SUBDIRS = . include portals ldiskfs lvfs obdclass lov ldlm ptlrpc \ + obdecho osc mdc mds obdfilter ost llite cobd ptlbd snapfs smfs cmobd \ + liblustre doc utils tests conf scripts + +EXTRA_DIST = BUGS FDL Rules.in kernel_patches kernel-tests/Makefile \ + README.kernel-source + +# these empty rules are needed so that automake doesn't add its own +# recursive rules +etags-recursive: + +ctags-recursive: + +tags-recursive: + +TAGS: + +tags: + rm -f $(top_srcdir)/TAGS + ETAGSF=`etags --version | grep -iq exuberant && \ + echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \ + find $(top_srcdir) -name '*.[hc]' | xargs etags $$ETAGSF -a + + rm -f $(top_srcdir)/tags + CTAGSF=`ctags --version | grep -iq exuberant && \ + echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \ + find $(top_srcdir) -name '*.[hc]' | xargs ctags $$CTAGSF -a + +if MODULES +all-am: modules + +if !LINUX25 +DEP = dep +dep: .depend + +.depend: + $(MAKE) $(ARCH_UM) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) -o scripts -o include/config/MARKER _sfdep_$(PWD) _FASTDEP_ALL_SUB_DIRS="$(PWD)" +endif + +if LDISKFS +LDISKFS = ldiskfs-sources +ldiskfs-sources: + $(MAKE) sources -C ldiskfs +endif + +lvfs-sources: + $(MAKE) sources -C lvfs + +modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources + $(MAKE) $(ARCH_UM) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) SUBDIRS=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@ + +lustre_build_version: + perl $(top_builddir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver + echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver + cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \ + 2> /dev/null && \ + $(RM) tmpver || \ + mv tmpver $(top_builddir)/include/linux/lustre_build_version.h + +endif # MODULES + +dist-hook: + find $(distdir) -name .deps | xargs rm -rf + find $(distdir) -name CVS | xargs rm -rf + +rpms: dist Makefile + rpmbuild -ta $(distdir).tar.gz + +CSTK=/tmp/checkstack +CSTKO=/tmp/checkstack.orig + +checkstack: + [ -f ${CSTK} -a ! -s ${CSTKO} ] && mv ${CSTK} ${CSTKO} || true + for i in ${SUBDIRS} portals/knals/*; do \ + MOD=$$i/`basename $$i`.o; \ + [ -f $$MOD ] && objdump -d $$MOD | perl tests/checkstack.pl; \ + done | sort -nr > ${CSTK} + [ -f ${CSTKO} ] && ! diff -u ${CSTKO} ${CSTK} || head -30 ${CSTK} + +checkstack-update: + [ -f ${CSTK} ] && mv ${CSTK} ${CSTKO} + +checkstack-clean: + rm -f ${CSTK} ${CSTKO} diff --git a/lustre/autogen.sh b/lustre/autogen.sh index be0d42d..004852e 100644 --- a/lustre/autogen.sh +++ b/lustre/autogen.sh @@ -1,5 +1,88 @@ -#!/bin/sh +#!/bin/bash + +# taken from gnome-common/macros2/autogen.sh +compare_versions() { + ch_min_version=$1 + ch_actual_version=$2 + ch_status=0 + IFS="${IFS= }"; ch_save_IFS="$IFS"; IFS="." + set $ch_actual_version + for ch_min in $ch_min_version; do + ch_cur=`echo $1 | sed 's/[^0-9].*$//'`; shift # remove letter suffixes + if [ -z "$ch_min" ]; then break; fi + if [ -z "$ch_cur" ]; then ch_status=1; break; fi + if [ $ch_cur -gt $ch_min ]; then break; fi + if [ $ch_cur -lt $ch_min ]; then ch_status=1; break; fi + done + IFS="$ch_save_IFS" + return $ch_status +} + +error_msg() { + echo "$cmd is $1. version $required is required to build Lustre." + + if [ -e /usr/lib/autolustre/bin/$cmd ]; then + cat >&2 <<-EOF + You apparently already have Lustre-specific autoconf/make RPMs + installed on your system at /usr/lib/autolustre/share/$cmd. + Please set your PATH to point to those versions: + + export PATH="/usr/lib/autolustre/bin:\$PATH" + EOF + else + cat >&2 <<-EOF + CFS provides RPMs which can be installed alongside your + existing autoconf/make RPMs, if you are nervous about + upgrading. See + + ftp://ftp.lustre.org/pub/other/autolustre/README.autolustre + + You may be able to download newer version from: + + http://ftp.gnu.org/gnu/$cmd/$cmd-$required.tar.gz + EOF + fi + [ "$cmd" = "autoconf" -a "$required" = "2.57" ] && cat >&2 <&2 </dev/null ; then + error_msg "missing" + fi + version=$($cmd --version | awk "BEGIN { IGNORECASE=1 } /$cmd \(GNU $cmd\)/ { print \$4 }") + echo "found $version" + if ! compare_versions "$required" "$version" ; then + error_msg "too old" + fi +} + +check_version automake "1.7.8" +check_version autoconf "2.57" +echo "Running aclocal..." +aclocal +echo "Running autoheader..." +autoheader +echo "Running automake..." +automake -a -c +echo "Running autoconf..." +autoconf -aclocal && -${AUTOMAKE:-automake} --add-missing && -${AUTOCONF:-autoconf} diff --git a/lustre/cmobd/.cvsignore b/lustre/cmobd/.cvsignore new file mode 100644 index 0000000..d5103fa --- /dev/null +++ b/lustre/cmobd/.cvsignore @@ -0,0 +1,15 @@ +.Xrefs +config.log +config.status +configure +Makefile +.deps +TAGS +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.o.flags +.tmp_versions +.depend diff --git a/lustre/cmobd/Makefile.in b/lustre/cmobd/Makefile.in new file mode 100644 index 0000000..8c16488 --- /dev/null +++ b/lustre/cmobd/Makefile.in @@ -0,0 +1,4 @@ +MODULES := cmobd +cmobd-objs := cache_manager_obd.o cmobd_reint.o cmobd_write.o +cmobd-objs += cmobd_oss_reint.o cmobd_mds_reint.o lproc_cm.o +@INCLUDE_RULES@ diff --git a/lustre/cmobd/Makefile.mk b/lustre/cmobd/Makefile.mk new file mode 100644 index 0000000..bcc81ec --- /dev/null +++ b/lustre/cmobd/Makefile.mk @@ -0,0 +1,11 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +include $(src)/../portals/Kernelenv + +obj-y += cmobd.o +cmobd-objs := cache_manager_obd.o cmobd_reint.o cmobd_write.o \ + cmobd_oss_reint.o cmobd_mds_reint.o lproc_cm.o + diff --git a/lnet/include/linux/Makefile.am b/lustre/cmobd/autoMakefile.am similarity index 53% rename from lnet/include/linux/Makefile.am rename to lustre/cmobd/autoMakefile.am index 6a65cb5..35dd6ef 100644 --- a/lnet/include/linux/Makefile.am +++ b/lustre/cmobd/autoMakefile.am @@ -3,8 +3,9 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include $(top_srcdir)/Rules +if MODULES +modulefs_DATA = cmobd$(KMODEXT) +endif -linuxincludedir = $(includedir)/linux - -linuxinclude_HEADERS=kp30.h portals_lib.h +MOSTLYCLEANFILES = *.o *.ko *.mod.c +DIST_SOURCES = $(cmobd-objs:%.o=%.c) cmobd_internal.h diff --git a/lustre/cmobd/cache_manager_obd.c b/lustre/cmobd/cache_manager_obd.c new file mode 100644 index 0000000..692f33f --- /dev/null +++ b/lustre/cmobd/cache_manager_obd.c @@ -0,0 +1,215 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CMOBD + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cmobd_internal.h" + +static int cmobd_attach(struct obd_device *obd, obd_count len, void *data) +{ + struct lprocfs_static_vars lvars; + + lprocfs_init_vars(cmobd, &lvars); + return lprocfs_obd_attach(obd, lvars.obd_vars); +} + +static int cmobd_detach(struct obd_device *obd) +{ + return lprocfs_obd_detach(obd); +} + +static void cmobd_find_master_client_obd(struct obd_device *obd, + struct obd_uuid *uuid) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + + cmobd->cm_master_obd = class_find_client_obd(NULL, OBD_LOV_DEVICENAME, + uuid); + if (cmobd->cm_master_obd == NULL) + cmobd->cm_master_obd = class_find_client_obd(NULL, + LUSTRE_MDC_NAME, + uuid); +} + +static int cmobd_setup(struct obd_device *obd, obd_count len, void *buf) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct lustre_cfg* lcfg = buf; + struct obd_uuid master_uuid, cache_uuid; + struct lustre_handle conn = { 0 }; + int rc; + ENTRY; + + if (lcfg->lcfg_inllen1 < 1 || !lcfg->lcfg_inlbuf1) { + CERROR("CMOBD setup requires master uuid\n"); + RETURN(-EINVAL); + } + if (lcfg->lcfg_inllen2 < 1 || !lcfg->lcfg_inlbuf2) { + CERROR("CMOBD setup requires cache uuid\n"); + RETURN(-EINVAL); + } + + obd_str2uuid(&master_uuid, lcfg->lcfg_inlbuf1); + obd_str2uuid(&cache_uuid, lcfg->lcfg_inlbuf2); + + cmobd_find_master_client_obd(obd, &master_uuid); + if (cmobd->cm_master_obd == NULL) { + CERROR("Can't find master obd %s\n", &master_uuid.uuid[0]); + RETURN(-EINVAL); + } + cmobd->cm_cache_obd = class_uuid2obd(&cache_uuid); + if (cmobd->cm_cache_obd == NULL) { + CERROR("Can't find cache obd %s\n", &cache_uuid.uuid[0]); + RETURN(-EINVAL); + } + + /* master lov connects to master ost here */ + rc = obd_connect(&conn, cmobd->cm_master_obd, &obd->obd_uuid); + if (rc) + RETURN(rc); + cmobd->cm_master_exp = class_conn2export(&conn); + + memset(&conn, 0, sizeof(conn)); + rc = class_connect(&conn, cmobd->cm_cache_obd, &obd->obd_uuid); + if (rc) + GOTO(put_master, rc); + cmobd->cm_cache_exp = class_conn2export(&conn); + if (!strcmp(cmobd->cm_master_obd->obd_type->typ_name, + OBD_LOV_DEVICENAME)){ + /* for master osc remove the recovery flag of oscc */ + rc = obd_set_info(cmobd->cm_master_exp, strlen("unrecovery"), + "unrecovery", 0, NULL); + if (rc) + GOTO(put_master, rc); + + rc = cmobd_init_write_srv(obd); + if (rc) + GOTO(put_cache, rc); + + } else { + /*FIXME later temp fix here + *Assumation:cache mds only have one ost*/ + cmobd->cm_master_obd->u.cli.cl_max_mds_easize = + lov_mds_md_size(1); + } + /* start n threads for write replay */ + RETURN(0); +put_cache: + class_disconnect(cmobd->cm_cache_exp, 0); +put_master: + obd_disconnect(cmobd->cm_master_exp, 0); + RETURN(rc); +} + +static int cmobd_cleanup(struct obd_device *obd, int flags) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + ENTRY; + + if (!strcmp(cmobd->cm_master_obd->obd_type->typ_name, + OBD_LOV_DEVICENAME)) { + cmobd_cleanup_write_srv(obd); + } + class_disconnect(cmobd->cm_cache_exp, 0); + obd_disconnect(cmobd->cm_master_exp, 0); + + RETURN(0); +} + +static int cmobd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void *uarg) +{ + struct obd_device *obd = exp->exp_obd; + int rc = 0; + ENTRY; + + switch (cmd) { + case OBD_IOC_CMOBD_SYNC: /* trigger reintegration */ + rc = cmobd_reintegrate(obd); + break; + default: + CERROR("unrecognized ioctl %#x\n", cmd); + rc = -EINVAL; + break; + } + + RETURN(rc); +} + +static struct obd_ops cmobd_ops = { + o_owner: THIS_MODULE, + o_attach: cmobd_attach, + o_detach: cmobd_detach, + o_setup: cmobd_setup, + o_cleanup: cmobd_cleanup, + o_iocontrol: cmobd_iocontrol, +}; + +kmem_cache_t *cmobd_extent_slab; + +static int __init cmobd_init(void) +{ + struct lprocfs_static_vars lvars; + int rc; + ENTRY; + + printk(KERN_INFO "Lustre: Cache Manager OBD driver; info@clusterfs.com\n"); + + lprocfs_init_vars(cmobd, &lvars); + rc = class_register_type(&cmobd_ops, NULL, lvars.module_vars, + LUSTRE_CMOBD_NAME); + if (rc) + RETURN(rc); + cmobd_extent_slab = kmem_cache_create("cmobd_extents", + sizeof(struct cmobd_extent_info), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (cmobd_extent_slab == NULL) { + class_unregister_type(LUSTRE_CMOBD_NAME); + RETURN(-ENOMEM); + } + RETURN(0); +} + +static void /*__exit*/ cmobd_exit(void) +{ + class_unregister_type(LUSTRE_CMOBD_NAME); + if (kmem_cache_destroy(cmobd_extent_slab) != 0) + CERROR("couldn't free cmobd extent slab\n"); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre Cache Manager OBD driver"); +MODULE_LICENSE("GPL"); + +module_init(cmobd_init); +module_exit(cmobd_exit); diff --git a/lustre/cmobd/cmobd_internal.h b/lustre/cmobd/cmobd_internal.h new file mode 100644 index 0000000..d245015 --- /dev/null +++ b/lustre/cmobd/cmobd_internal.h @@ -0,0 +1,28 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2003 Cluster File Systems, Inc. + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + */ + +#ifndef CMOBD_INTERNAL_H +#define CMOBD_INTERNAL_H + +/* cmobd_reint.c */ +int cmobd_reintegrate(struct obd_device *); +int cmobd_dummy_lsm(struct lov_stripe_md **, int, struct obdo*, __u32); +void cmobd_free_lsm(struct lov_stripe_md **); + +/* cmobd_write.c */ +int cmobd_replay_write(struct obd_device *, struct obdo*, struct ldlm_extent *); +int cmobd_init_write_srv(struct obd_device *); +void cmobd_cleanup_write_srv(struct obd_device *); + +int cmobd_reint_mds(struct obd_device*, void *record); +int cmobd_reint_setattr(struct obd_device *obd, void *rec); +int cmobd_reint_create(struct obd_device *obd, void *rec); +int cmobd_reint_write(struct obd_device *obd, void *rec); + +#endif /* CMOBD_INTERNAL_H */ diff --git a/lustre/cmobd/cmobd_mds_reint.c b/lustre/cmobd/cmobd_mds_reint.c new file mode 100644 index 0000000..0398ce3 --- /dev/null +++ b/lustre/cmobd/cmobd_mds_reint.c @@ -0,0 +1,185 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.sf.net/projects/lustre/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CMOBD + +#include +#include +#include +#include +#include +#include +#include +#include "cmobd_internal.h" + +/* If mdc_setattr is called with an 'iattr', then it is a normal RPC that + * should take the normal semaphore and go to the normal portal. + * + * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a + * magic open-path setattr that should take the setattr semaphore and + * go to the setattr portal. */ +int cmobd_setattr_reint(struct obd_device *obd, struct ptlrpc_request *req) +{ + struct mds_rec_setattr *rec; + int size[1], rc = 0; + + ENTRY; + + rec = (struct mds_rec_setattr *)lustre_msg_buf(req->rq_reqmsg, 0, 0); + if (!rec) + RETURN (-EINVAL); + if (rec->sa_valid & ATTR_FROM_OPEN) + req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249 + + if (rec->sa_valid & (ATTR_MTIME | ATTR_CTIME)) + CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n", + LTIME_S(((time_t)rec->sa_mtime)), + LTIME_S(((time_t)rec->sa_ctime))); + + size[0] = sizeof(struct mds_body); + req->rq_replen = lustre_msg_size(1, size); + + rc = mdc_reint(req, NULL, LUSTRE_IMP_FULL); + + if (rc == -ERESTARTSYS) + rc = 0; + + RETURN(rc); +} + +int cmobd_create_reint(struct obd_device *obd, struct ptlrpc_request *req) +{ + int rc = 0, level, size[1]; + ENTRY; + + size[0] = sizeof(struct mds_body); + req->rq_replen = lustre_msg_size(1, size); + + level = LUSTRE_IMP_FULL; + resend: + rc = mdc_reint(req, NULL, level); + /* Resend if we were told to. */ + if (rc == -ERESTARTSYS) { + level = LUSTRE_IMP_RECOVER; + goto resend; + } + + if (!rc) + mdc_store_inode_generation(NULL, req, 0, 0); + + RETURN(rc); +} + +int cmobd_unlink_reint(struct obd_device *obd, struct ptlrpc_request *req) +{ + int rc = 0, size[3]; + ENTRY; + + size[0] = sizeof(struct mds_body); + size[1] = obd->u.cli.cl_max_mds_easize; + size[2] = obd->u.cli.cl_max_mds_cookiesize; + req->rq_replen = lustre_msg_size(3, size); + + rc = mdc_reint(req, NULL, LUSTRE_IMP_FULL); + if (rc == -ERESTARTSYS) + rc = 0; + RETURN(rc); +} + +int cmobd_link_reint(struct obd_device *obd, struct ptlrpc_request *req) +{ + int rc = 0, size[1]; + ENTRY; + + size[0] = sizeof(struct mds_body); + req->rq_replen = lustre_msg_size(1, size); + + rc = mdc_reint(req, NULL, LUSTRE_IMP_FULL); + if (rc == -ERESTARTSYS) + rc = 0; + + RETURN(rc); +} + +int cmobd_rename_reint(struct obd_device *obd, struct ptlrpc_request *req) +{ + int rc = 0, size[2]; + ENTRY; + + size[0] = sizeof(struct mds_body); + size[1] = obd->u.cli.cl_max_mds_easize; + req->rq_replen = lustre_msg_size(2, size); + + rc = mdc_reint(req, NULL, LUSTRE_IMP_FULL); + if (rc == -ERESTARTSYS) + rc = 0; + + RETURN(rc); +} + +typedef int (*cmobd_reint_mds_rec)(struct obd_device*, + struct ptlrpc_request *req); + +static cmobd_reint_mds_rec cmobd_mds_reint[REINT_MAX + 1] = { + [REINT_SETATTR] cmobd_setattr_reint, + [REINT_CREATE] cmobd_create_reint, + [REINT_LINK] cmobd_link_reint, + [REINT_UNLINK] cmobd_unlink_reint, + [REINT_RENAME] cmobd_rename_reint, +}; + +int cmobd_reint_mds(struct obd_device *obd, void* record) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct ptlrpc_request *req; + struct lustre_msg *msg; + struct mds_kml_pack_info *mkpi; + __u32 opcode; + int rc = 0; + mkpi = (struct mds_kml_pack_info *)record; + + req = ptlrpc_prep_req(class_exp2cliimp(cmobd->cm_master_exp), + MDS_REINT, mkpi->mpi_bufcount, mkpi->mpi_size, + NULL); + if (req == NULL) + RETURN(-ENOMEM); + record += sizeof(*mkpi); + msg = (struct lustre_msg *)record; + opcode = (__u32)*(int*)lustre_msg_buf(msg, 0, 0); + if (opcode > REINT_MAX || opcode <= 0) { + CERROR("Unrecorgnized reint opcode %u in cmobd mds reint\n", + opcode); + GOTO(out, rc=-EINVAL); + } + + memcpy(req->rq_reqmsg, record, mkpi->mpi_total_size); + /*flags and opc will be rewrite, so reset here + *FIXME maybe should set some flags in reint process*/ + + req->rq_reqmsg->opc = MDS_REINT; + req->rq_reqmsg->flags = 0; + + rc = cmobd_mds_reint[opcode](cmobd->cm_master_obd, req); +out: + ptlrpc_req_finished(req); + return rc; +} + diff --git a/lustre/cmobd/cmobd_oss_reint.c b/lustre/cmobd/cmobd_oss_reint.c new file mode 100644 index 0000000..41d5369 --- /dev/null +++ b/lustre/cmobd/cmobd_oss_reint.c @@ -0,0 +1,291 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CMOBD + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cmobd_internal.h" + +int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern); +void lov_free_memmd(struct lov_stripe_md **lsmp); +int smfs_rec_unpack(struct smfs_proc_args *args, char *record, char **pbuf, + int *opcode); + +/* helper functions for cmobd to construct pseudo lsm */ +int cmobd_dummy_lsm(struct lov_stripe_md **lsmp, int stripe_cnt, + struct obdo *oa, __u32 stripe_size) +{ + int i, rc; + ENTRY; + + rc = lov_alloc_memmd(lsmp, stripe_cnt, LOV_PATTERN_CMOBD); + if (rc < 0) + RETURN(rc); + + for (i = 0; i < stripe_cnt; i++) { + (*lsmp)->lsm_oinfo[i].loi_id = oa->o_id; + (*lsmp)->lsm_object_id = oa->o_id; + if (oa->o_valid & OBD_MD_FLGROUP) { + (*lsmp)->lsm_oinfo[i].loi_gr = oa->o_gr; + (*lsmp)->lsm_object_gr = oa->o_gr; + } + (*lsmp)->lsm_oinfo[i].loi_ost_idx = i; + (*lsmp)->lsm_stripe_size = stripe_size; + } + RETURN(0); +} + +void cmobd_free_lsm(struct lov_stripe_md **lsmp) +{ + ENTRY; + lov_free_memmd(lsmp); + EXIT; +} + +/* reintegration functions */ +int cmobd_reint_setattr(struct obd_device *obd, void *rec) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct lov_obd *lov = &cmobd->cm_master_obd->u.lov; + struct obd_export *exp = cmobd->cm_master_exp; + struct lov_stripe_md *lsm; + struct obdo *oa = (struct obdo*)rec; + int rc; + ENTRY; + + rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa, + (__u32)lov->desc.ld_default_stripe_size); + if (rc) + GOTO(out, rc); + + rc = obd_setattr(exp, oa, lsm, NULL); + + cmobd_free_lsm(&lsm); +out: + RETURN(rc); +} + +int cmobd_reint_create(struct obd_device *obd, void *rec) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct lov_obd *lov = &cmobd->cm_master_obd->u.lov; + struct obd_export *exp = cmobd->cm_master_exp; + struct lov_stripe_md *lsm; + struct obd_trans_info oti = { 0 }; + struct obdo *oa=(struct obdo*)rec; + int rc; + ENTRY; + + rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa, + (__u32)lov->desc.ld_default_stripe_size); + if (rc) + GOTO(out, rc); + if (cmobd->cm_master_group != oa->o_gr) { + int group = oa->o_gr; + int valsize = sizeof(group); + rc = obd_set_info(exp, strlen("mds_conn"), "mds_conn", + valsize, &group); + if (rc) + GOTO(out, rc = -EINVAL); + cmobd->cm_master_group = oa->o_gr; + } + rc = obd_create(exp, oa, &lsm, &oti); + + cmobd_free_lsm(&lsm); +out: + RETURN(rc); +} + +/* direct cut-n-paste of filter_blocking_ast() */ +static int cache_blocking_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, + void *data, int flag) +{ + int do_ast; + ENTRY; + + if (flag == LDLM_CB_CANCELING) { + /* Don't need to do anything here. */ + RETURN(0); + } + + /* XXX layering violation! -phil */ + l_lock(&lock->l_resource->lr_namespace->ns_lock); + /* Get this: if filter_blocking_ast is racing with ldlm_intent_policy, + * such that filter_blocking_ast is called just before l_i_p takes the + * ns_lock, then by the time we get the lock, we might not be the + * correct blocking function anymore. So check, and return early, if + * so. */ + if (lock->l_blocking_ast != cache_blocking_ast) { + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + RETURN(0); + } + + lock->l_flags |= LDLM_FL_CBPENDING; + do_ast = (!lock->l_readers && !lock->l_writers); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + + if (do_ast) { + struct lustre_handle lockh; + int rc; + + LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel"); + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh); + if (rc < 0) + CERROR("ldlm_cli_cancel: %d\n", rc); + } else { + LDLM_DEBUG(lock, "Lock still has references, will be " + "cancelled later"); + } + RETURN(0); +} + +static int master_blocking_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, + void *data, int flag) +{ + int rc; + struct lustre_handle lockh; + ENTRY; + + switch (flag) { + case LDLM_CB_BLOCKING: + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh); + if (rc < 0) { + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + RETURN(rc); + } + break; + case LDLM_CB_CANCELING: + /* do nothing here by now */ + break; + default: + LBUG(); + } + RETURN(0); +} + +static int cmobd_write_extents(struct obd_device *obd, struct obdo *oa, + struct ldlm_extent *extent) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct obd_device *cache = cmobd->cm_cache_obd; + struct lov_obd *lov = &cmobd->cm_master_obd->u.lov; + struct ldlm_res_id res_id; + ldlm_policy_data_t policy; + struct lustre_handle lockh_src = { 0 }; + struct lustre_handle lockh_dst = { 0 }; + struct lov_stripe_md *lsm; + int flags = 0, err, rc = 0; + ENTRY; + + /* XXX for debug write replay without smfs and kml */ + res_id.name[0]= oa->o_id; + res_id.name[1]= oa->o_gr; + policy.l_extent.start = extent->start; + policy.l_extent.end = extent->end; + + /* get extent read lock on the source replay file */ + rc = ldlm_cli_enqueue(NULL, NULL, cache->obd_namespace, res_id, + LDLM_EXTENT, &policy, LCK_PR, + &flags, cache_blocking_ast, ldlm_completion_ast, + NULL, NULL, NULL, 0, NULL, &lockh_src); + if (rc != ELDLM_OK) + RETURN(rc); + + /* construct the pseudo lsm */ + rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa, + (__u32)lov->desc.ld_default_stripe_size); + if (rc) + GOTO(out_lock, rc); + + rc = obd_enqueue(cmobd->cm_master_exp, lsm, LDLM_EXTENT, &policy, + LCK_PW, &flags, master_blocking_ast, + ldlm_completion_ast, NULL, + NULL, 0, NULL, &lockh_dst); + if (rc != ELDLM_OK) + GOTO(out_lsm, rc); + + err = cmobd_replay_write(obd, oa, &policy.l_extent); + + rc = obd_cancel(cmobd->cm_master_exp, lsm, LCK_PW, &lockh_dst); + if (rc) + GOTO(out_lsm, rc); + /* XXX in fact, I just want to cancel the only lockh_dst + * instantly. */ + rc = obd_cancel_unused(cmobd->cm_master_exp, lsm, 0, NULL); + if (err) + rc = err; +out_lsm: + cmobd_free_lsm(&lsm); +out_lock: + ldlm_lock_decref(&lockh_src, LCK_PR); + RETURN(rc); +} +int cmobd_reint_write(struct obd_device *obd, void *rec) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct obd_device *cache = cmobd->cm_cache_obd; + struct obdo *oa = (struct obdo *)rec; + struct ldlm_extent *extent = NULL; + unsigned long csb, ino; + char *extents_buf = NULL; + int size = 0, rc = 0, ext_num = 0; + ENTRY; + + size = sizeof(csb); + + obd_get_info(cmobd->cm_cache_exp, strlen("cache_sb") + 1, + "cache_sb", &size, &csb); + + ino = *(int*)(&oa->o_inline[0]); + rc = fsfilt_get_ino_write_extents(cache, (struct super_block *)csb, ino, + &extents_buf, &ext_num); + if (rc) + GOTO(out, rc); + extent = (struct ldlm_extent *)extents_buf; + size = ext_num; + while (extent && size --) { + rc = cmobd_write_extents(obd, oa, extent); + if (rc) + GOTO(out, rc); + extent ++; + } +out: + if (extents_buf) + fsfilt_free_write_extents(cache, (struct super_block *)csb, ino, + extents_buf, ext_num); + RETURN(rc); +} + diff --git a/lustre/cmobd/cmobd_reint.c b/lustre/cmobd/cmobd_reint.c new file mode 100644 index 0000000..f68132f --- /dev/null +++ b/lustre/cmobd/cmobd_reint.c @@ -0,0 +1,119 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CMOBD + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cmobd_internal.h" + +static int cmobd_reint_record(int opcode, struct obd_device *obd, char *record) +{ + int rc = 0; + + switch (opcode) { + case OST_CREATE: + rc = cmobd_reint_create(obd, record); + break; + case OST_SETATTR: + rc = cmobd_reint_setattr(obd, record); + break; + case OST_WRITE: + rc = cmobd_reint_write(obd, record); + break; + case MDS_REINT: + rc = cmobd_reint_mds(obd, record); + break; + default: + CERROR("unrecognized format %d\n", opcode); + rc = -EINVAL; + break; + } + return rc; +} +static int cmobd_reint_cb(struct llog_handle *llh, struct llog_rec_hdr *rec, + void *data) +{ + struct obd_device *obd = (struct obd_device*)data; + char *buf, *pbuf; + int rc = 0, opcode; + + ENTRY; + + if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) { + CERROR("log is not plain log\n"); + RETURN(-EINVAL); + } + if (rec->lrh_type != SMFS_UPDATE_REC) + RETURN(-EINVAL); + + buf = (char *)(rec + 1); + rc = smfs_rec_unpack(NULL, buf, &pbuf, &opcode); + if (rc) + GOTO(out, rc); + rc = cmobd_reint_record(opcode, obd, pbuf); + if (rc) + GOTO(out, rc); + /*delete this record*/ + rc = LLOG_DEL_RECORD; +out: + RETURN(rc); +} + +int cmobd_reintegrate(struct obd_device *obd) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct llog_ctxt *ctxt = NULL; + struct llog_handle *llh; + int val_size, rc = 0; + ENTRY; + + /* XXX just fetch the reintegration log context from + * cache ost directly, use logid later ?? */ + val_size = sizeof(ctxt); + rc = obd_get_info(cmobd->cm_cache_exp, strlen("reint_log") + 1, + "reint_log", &val_size, &ctxt); + if (rc) + RETURN(rc); + + /* use the already opened log handle instead of + * reopen a new log handle */ + llh = ctxt ? ctxt->loc_handle : NULL; + if (llh == NULL) + RETURN(-EFAULT); + + /* FIXME should we insert a LLOG_GEN_REC before process log ? */ + rc = llog_cat_process(llh, (llog_cb_t)cmobd_reint_cb, obd); + + RETURN(rc); +} + + diff --git a/lustre/cmobd/cmobd_write.c b/lustre/cmobd/cmobd_write.c new file mode 100644 index 0000000..424ae98 --- /dev/null +++ b/lustre/cmobd/cmobd_write.c @@ -0,0 +1,750 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_CMOBD + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cmobd_internal.h" + +extern kmem_cache_t *cmobd_extent_slab; + +/* helper function to split an extent */ +static obd_count split_extent(struct ldlm_extent *ext, unsigned long interval) +{ + obd_count buf_count, remainder; + ENTRY; + + buf_count = ext->end - ext->start + 1; + LASSERT(buf_count > 0); + + remainder = do_div(buf_count, interval); + if (remainder) + buf_count++; + + RETURN(buf_count); +} + +static int cmobd_ap_make_ready(void *data, int cmd) +{ + struct cmobd_async_page *cmap = (struct cmobd_async_page *)data; + struct page *page = cmap->cmap_page; + ENTRY; + + if (cmd == OBD_BRW_READ) + RETURN(0); + + if (TryLockPage(page)) + RETURN(-EAGAIN); + + RETURN(0); +} + +static int cmobd_ap_refresh_count(void *data, int cmd) +{ + struct cmobd_async_page *cmap = (struct cmobd_async_page *)data; + struct page *page = cmap->cmap_page; + struct inode *inode = page->mapping->host; + ENTRY; + + LASSERT(cmd != OBD_BRW_READ); + + /* catch race with truncate */ + if (((loff_t)page->index << PAGE_SHIFT) >= inode->i_size) + RETURN(0); + + /* catch sub-page write at end of file */ + if (((loff_t)page->index << PAGE_SHIFT) + PAGE_SIZE > inode->i_size) + RETURN(inode->i_size % PAGE_SIZE); + + RETURN(PAGE_SIZE); +} + +static void cmobd_ap_fill_obdo(void *data, int cmd, struct obdo *oa) +{ + struct cmobd_async_page *cmap = (struct cmobd_async_page *)data; + obd_flag valid_flags; + struct inode *inode; + ENTRY; + + if (IS_ERR(cmap)) { + EXIT; + return; + } + + inode = cmap->cmap_page->mapping->host; + oa->o_id = cmap->cmap_es->es_oa.o_id; + oa->o_gr = cmap->cmap_es->es_oa.o_gr; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; + valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME; + if (cmd == OBD_BRW_WRITE) { + oa->o_valid |= OBD_MD_FLIFID; + mdc_pack_fid(obdo_fid(oa), inode->i_ino, 0, inode->i_mode); + + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME; + } + + obdo_from_inode(oa, inode, valid_flags); + + EXIT; + return; +} + +static void cmobd_ap_completion(void *data, int cmd, struct obdo *oa, int rc) +{ + struct cmobd_async_page *cmap = (struct cmobd_async_page *)data; + struct cmobd_extent_set *set = cmap->cmap_es; + unsigned long flags; + struct page *page; + int wakeup = 0; + ENTRY; + + page = cmap->cmap_page; + LASSERT(PageLocked(page)); + + /* XXX */ + if (rc) + SetPageError(page); + + spin_lock_irqsave(&set->es_lock, flags); + LASSERT(!list_empty(&set->es_pages)); + LASSERT(!list_empty(&cmap->cmap_link)); + + list_del_init(&cmap->cmap_link); + if (list_empty(&set->es_pages) && !set->es_count) + wakeup = 1; + spin_unlock_irqrestore(&set->es_lock, flags); + + obd_teardown_async_page(set->es_exp, set->es_lsm, NULL, + cmap->cmap_cookie); + OBD_FREE(cmap, sizeof(*cmap)); + + unlock_page(page); + page_cache_release(page); + + if (wakeup) + wake_up(&set->es_waitq); + EXIT; + return; +} + +static struct obd_async_page_ops cmobd_async_page_ops = { + .ap_make_ready = cmobd_ap_make_ready, + .ap_refresh_count = cmobd_ap_refresh_count, + .ap_fill_obdo = cmobd_ap_fill_obdo, + .ap_completion = cmobd_ap_completion, +}; + +static int cmobd_send_pages(struct obd_device *obd, + struct niobuf_local *lnb, + obd_count oa_bufs, + struct cmobd_extent_set *set) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct obd_export *exp = cmobd->cm_master_exp; + struct cmobd_async_page *cmap = NULL; + obd_count i; + int rc = 0; + unsigned long flags; + ENTRY; + + for (i = 0; i < oa_bufs; i++, lnb++) { + + OBD_ALLOC(cmap, sizeof(*cmap)); + if (cmap == NULL) { + CERROR("Not enought memory\n"); + rc = -ENOMEM; + break; + } + INIT_LIST_HEAD(&cmap->cmap_link); + cmap->cmap_page = lnb->page; + cmap->cmap_es = set; + + rc = obd_prep_async_page(exp, set->es_lsm, NULL, lnb->page, + lnb->offset, &cmobd_async_page_ops, + cmap, &cmap->cmap_cookie); + if (rc) { + CERROR("cmobd prep async page failed page(%p) rc(%d)\n", + lnb->page, rc); + OBD_FREE(cmap, sizeof(*cmap)); + break; + } + + LASSERT(cmap->cmap_page); + LASSERT(!PageLocked(cmap->cmap_page)); + LASSERT(Page_Uptodate(cmap->cmap_page)); + page_cache_get(cmap->cmap_page); + + spin_lock_irqsave(&set->es_lock, flags); + list_add_tail(&cmap->cmap_link, &set->es_pages); + spin_unlock_irqrestore(&set->es_lock, flags); + + rc = obd_queue_async_io(exp, set->es_lsm, NULL, cmap->cmap_cookie, + OBD_BRW_WRITE, 0, 0, 0, 0); + if (rc) { /* try sync io */ + struct obd_io_group *oig; + + spin_lock_irqsave(&set->es_lock, flags); + list_del_init(&cmap->cmap_link); + spin_unlock_irqrestore(&set->es_lock, flags); + + lock_page(cmap->cmap_page); + + rc = oig_init(&oig); + if (rc) + GOTO(free_page, rc); + + rc = obd_queue_group_io(exp, set->es_lsm, NULL, oig, + cmap->cmap_cookie, + OBD_BRW_WRITE, 0, lnb->len, 0, + ASYNC_READY | ASYNC_URGENT | + ASYNC_COUNT_STABLE | + ASYNC_GROUP_SYNC); + + if (rc) + GOTO(free_oig, rc); + + rc = obd_trigger_group_io(exp, set->es_lsm, NULL, oig); + if (rc) + GOTO(free_oig, rc); + + rc = oig_wait(oig); +free_oig: + oig_release(oig); +free_page: + unlock_page(cmap->cmap_page); + page_cache_release(cmap->cmap_page); + obd_teardown_async_page(exp, set->es_lsm, NULL, + cmap->cmap_cookie); + OBD_FREE(cmap, sizeof(*cmap)); + if (rc) { + CERROR("cmobd sync io failed\n"); + break; + } + } + } + RETURN(rc); +} + +static int cmobd_write_extent(struct obd_device *obd, + struct cmobd_extent_info *ei) +{ + struct cmobd_extent_set *set = ei->ei_set; + struct cache_manager_obd *cmobd = &obd->u.cmobd; + unsigned long flags; + struct obd_ioobj ioo; + struct niobuf_local *lnb; + struct niobuf_remote *rnb; + obd_count i, oa_bufs; + struct obdo *oa; + obd_off offset; + int ret, rc = 0, wakeup = 0; + ENTRY; + + oa_bufs = split_extent(&ei->ei_extent, PAGE_SIZE); + LASSERT(oa_bufs > 0); + + OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local)); + OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote)); + oa = obdo_alloc(); + + if (lnb == NULL || rnb == NULL || oa == NULL) + GOTO(out, rc = -ENOMEM); + + LASSERT(ei->ei_extent.end >= ei->ei_extent.start); + LASSERT((ei->ei_extent.start & (PAGE_SIZE -1)) == 0); + + for (i = 0, offset = ei->ei_extent.start; i < oa_bufs; + i++, offset += PAGE_SIZE) { + rnb[i].offset = offset; + rnb[i].len = MIN(PAGE_SIZE, ei->ei_extent.end - offset + 1); + } + + memcpy(oa, &set->es_oa, sizeof(*oa)); + obdo_to_ioobj(oa, &ioo); + ioo.ioo_bufcnt = oa_bufs; + + ret = obd_preprw(OBD_BRW_READ, cmobd->cm_cache_exp, oa, 1, &ioo, + oa_bufs, rnb, lnb, NULL); + if (ret) + GOTO(out, rc = ret); + + rc = cmobd_send_pages(obd, lnb, oa_bufs, set); + if (rc) + CERROR("cmobd_send_pages failed %d\n", rc); + + rc = obd_commitrw(OBD_BRW_READ, cmobd->cm_cache_exp, oa, 1, &ioo, + oa_bufs, lnb, NULL, ret); + + /* countdown and wake up */ + spin_lock_irqsave(&set->es_lock, flags); + LASSERT(set->es_count); + set->es_count--; + if (!set->es_count) + wakeup = 1; + spin_unlock_irqrestore(&set->es_lock, flags); + + if (wakeup) + wake_up(&set->es_waitq); + +out: + if (lnb) + OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local)); + if (rnb) + OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote)); + if (oa) + obdo_free(oa); + + RETURN(rc); +} + +static struct cmobd_extent_info* get_next_ei(struct cmobd_write_service *ws) +{ + struct cmobd_extent_info *ei = NULL; + unsigned long flags; + int wakeup = 0; + + spin_lock_irqsave(&ws->ws_extent_lock, flags); + if (!list_empty(&ws->ws_extents)) { + ei = list_entry(ws->ws_extents.next, + struct cmobd_extent_info, ei_link); + list_del_init(&ei->ei_link); + ws->ws_nextents--; + if (ws->ws_nextents < CMOBD_MAX_EXTENTS) + wakeup = 1; + } + spin_unlock_irqrestore(&ws->ws_extent_lock, flags); + + if (wakeup) + wake_up_all(&ws->ws_waitq_provider); + + return ei; +} + +static int cmobd_write_main(void *arg) +{ + struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; + struct ptlrpc_thread *thread = data->thread; + struct obd_device *obd = data->dev; + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct cmobd_write_service *ws = cmobd->cm_write_srv; + struct cmobd_extent_info *extent = NULL; + unsigned long flags; + int rc; + ENTRY; + + lock_kernel(); + /* vv ptlrpc_daemonize(); vv */ + exit_mm(current); + + current->session = 1; + current->pgrp = 1; + current->tty = NULL; + + exit_files(current); + reparent_to_init(); + /* ^^ ptlrpc_daemonize(); ^^ */ + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + LASSERTF(strlen(data->name) < sizeof(current->comm), + "name %d > len %d\n",strlen(data->name),sizeof(current->comm)); + THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name); + + unlock_kernel(); + + thread->t_flags = SVC_RUNNING; + wake_up(&thread->t_ctl_waitq); + + /* Record that the thread is running */ + spin_lock_irqsave(&ws->ws_thread_lock, flags); + ws->ws_nthreads++; + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + + while ((thread->t_flags & SVC_STOPPING) == 0) { + struct l_wait_info lwi = { 0 }; + + l_wait_event_exclusive(ws->ws_waitq_consumer, + ((thread->t_flags & SVC_STOPPING) || + ((extent = get_next_ei(ws)) != + NULL)), + &lwi); + if (extent == NULL) + continue; + rc = cmobd_write_extent(obd, extent); + if (rc) + CERROR("write extent failed rc=%d\n", rc); + OBD_SLAB_FREE(extent, cmobd_extent_slab, sizeof(*extent)); + extent = NULL; + } + + thread->t_flags = SVC_STOPPED; + wake_up(&thread->t_ctl_waitq); + + spin_lock_irqsave(&ws->ws_thread_lock, flags); + ws->ws_nthreads--; /* must know immediately */ + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + + RETURN(0); +} + +/* functions for manipulating cmobd write replay threads, similar with + * ptlrpc threads functions */ +static int cmobd_start_thread(struct obd_device *obd, char *name) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct cmobd_write_service *ws = cmobd->cm_write_srv; + struct l_wait_info lwi = { 0 }; + struct ptlrpc_svc_data d; + struct ptlrpc_thread *thread; + unsigned long flags; + int rc; + ENTRY; + + OBD_ALLOC(thread, sizeof(*thread)); + if (thread == NULL) + RETURN(-ENOMEM); + init_waitqueue_head(&thread->t_ctl_waitq); + + d.dev = obd; + d.svc = NULL; + d.name = name; + d.thread = thread; + + spin_lock_irqsave(&ws->ws_thread_lock, flags); + list_add(&thread->t_link, &ws->ws_threads); + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + + /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we + * just drop the VM and FILES in ptlrpc_daemonize() right away. + */ + rc = kernel_thread(cmobd_write_main, &d, CLONE_VM | CLONE_FILES); + if (rc < 0) { + CERROR("cannot start thread: %d\n", rc); + spin_lock_irqsave(&ws->ws_thread_lock, flags); + list_del_init(&thread->t_link); + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + OBD_FREE(thread, sizeof(*thread)); + RETURN(rc); + } + l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING, &lwi); + + RETURN(0); + +} + +static void cmobd_stop_thread(struct obd_device *obd, + struct ptlrpc_thread *thread) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct cmobd_write_service *ws = cmobd->cm_write_srv; + struct l_wait_info lwi = { 0 }; + unsigned long flags; + ENTRY; + + thread->t_flags = SVC_STOPPING; + wake_up_all(&ws->ws_waitq_consumer); + + l_wait_event(thread->t_ctl_waitq, (thread->t_flags & SVC_STOPPED), + &lwi); + + spin_lock_irqsave(&ws->ws_thread_lock, flags); + list_del(&thread->t_link); + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + + OBD_FREE(thread, sizeof(*thread)); + EXIT; +} + +static void cmobd_stop_all_threads(struct obd_device *obd) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct cmobd_write_service *ws = cmobd->cm_write_srv; + unsigned long flags; + struct ptlrpc_thread *thread; + ENTRY; + + spin_lock_irqsave(&ws->ws_thread_lock, flags); + while (!list_empty(&ws->ws_threads)) { + thread = list_entry(ws->ws_threads.next, + struct ptlrpc_thread, t_link); + + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + cmobd_stop_thread(obd, thread); + spin_lock_irqsave(&ws->ws_thread_lock, flags); + } + + spin_unlock_irqrestore(&ws->ws_thread_lock, flags); + EXIT; +} + +static int cmobd_start_n_threads(struct obd_device *obd, int num_threads, + char *base_name) +{ + int i, rc = 0; + ENTRY; + + for (i = 0; i < num_threads; i++) { + char name[32]; + snprintf(name, sizeof(name) - 1, "%s_%02d", base_name, i); + rc = cmobd_start_thread(obd, name); + if (rc) { + CERROR("cannot start %s thread #%d: rc %d\n", base_name, + i, rc); + cmobd_stop_all_threads(obd); + } + } + RETURN(rc); +} + +void cmobd_cleanup_write_srv(struct obd_device *obd) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct list_head *pos, *n; + struct cmobd_extent_info *ei; + ENTRY; + + cmobd_stop_all_threads(obd); + + list_for_each_safe(pos, n, &cmobd->cm_write_srv->ws_extents) { + ei = list_entry(pos, struct cmobd_extent_info, ei_link); + list_del_init(&ei->ei_link); + OBD_FREE(ei, sizeof(*ei)); + } + OBD_FREE(cmobd->cm_write_srv, sizeof(*cmobd->cm_write_srv)); + EXIT; +} + +int cmobd_init_write_srv(struct obd_device *obd) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct cmobd_write_service *ws; + int rc; + ENTRY; + + OBD_ALLOC(cmobd->cm_write_srv, sizeof(*cmobd->cm_write_srv)); + if (cmobd->cm_write_srv == NULL) + RETURN(-ENOMEM); + ws = cmobd->cm_write_srv; + + INIT_LIST_HEAD(&ws->ws_threads); + spin_lock_init(&ws->ws_thread_lock); + ws->ws_nthreads = 0; + + INIT_LIST_HEAD(&ws->ws_extents); + spin_lock_init(&ws->ws_extent_lock); + ws->ws_nextents = 0; + init_waitqueue_head(&ws->ws_waitq_provider); + init_waitqueue_head(&ws->ws_waitq_consumer); + + rc = cmobd_start_n_threads(obd, CMOBD_NUM_THREADS, "cm_write"); + if (rc) + cmobd_cleanup_write_srv(obd); + + RETURN(rc); +} + +static int extent_queue_full(struct cmobd_write_service *ws) +{ + unsigned long flags; + int full = 0; + + spin_lock_irqsave(&ws->ws_extent_lock, flags); + full = (ws->ws_nextents >= CMOBD_MAX_EXTENTS) ? 1 : 0; + spin_unlock_irqrestore(&ws->ws_extent_lock, flags); + + return full; +} + +static void cmobd_queue_extent(struct obd_device *obd, + struct cmobd_extent_info *ex) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct cmobd_write_service *ws = cmobd->cm_write_srv; + struct cmobd_extent_set *set = ex->ei_set; + unsigned long flags; + struct l_wait_info lwi = { 0 }; + ENTRY; + +wait: + l_wait_event(ws->ws_waitq_provider, !extent_queue_full(ws), &lwi); + + spin_lock_irqsave(&ws->ws_extent_lock, flags); + if (ws->ws_nextents >= CMOBD_MAX_EXTENTS) { + spin_unlock_irqrestore(&ws->ws_extent_lock, flags); + goto wait; + } + list_add_tail(&ex->ei_link, &ws->ws_extents); + ws->ws_nextents++; + spin_unlock_irqrestore(&ws->ws_extent_lock, flags); + + spin_lock_irqsave(&set->es_lock, flags); + set->es_count++; + spin_unlock_irqrestore(&set->es_lock, flags); + + wake_up_all(&ws->ws_waitq_consumer); + + EXIT; +} + +static obd_size cmobd_fid2size(struct obd_export *exp, obd_id id, obd_gr grp) +{ + struct lvfs_run_ctxt saved; + struct dentry *de = NULL; + obd_size size; + ENTRY; + + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); + + de = obd_lvfs_fid2dentry(exp, id, 0, grp); + LASSERT(de); + + size = de->d_inode->i_size; + + dput(de); + pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); + + RETURN(size); +} + +static int extent_set_done(struct cmobd_extent_set *set, int phase) +{ + int done = 0; + unsigned long flags; + + spin_lock_irqsave(&set->es_lock, flags); + if (phase == 1) + done = set->es_count ? 0 : 1; + else if (phase == 2) + done = (!set->es_count && list_empty(&set->es_pages)) ? 1 : 0; + spin_unlock_irqrestore(&set->es_lock, flags); + + return done; +} + +int cmobd_replay_write(struct obd_device *obd, struct obdo *oa, + struct ldlm_extent *ext) +{ + struct cache_manager_obd *cmobd = &obd->u.cmobd; + struct lov_obd *lov = &cmobd->cm_master_obd->u.lov; + struct lov_stripe_md *lsm = NULL; + struct cmobd_extent_set set; + struct cmobd_extent_info *ex; + struct l_wait_info lwi = { 0 }; + struct list_head *pos, *n; + struct cmobd_async_page *cmap; + unsigned long flags; + obd_count i, buf_count; + obd_off start; + int rc = 0; + ENTRY; + + rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa, + (__u32)lov->desc.ld_default_stripe_size); + if (rc) + RETURN(-ENOMEM); + + set.es_extent.start = ext->start; + set.es_extent.end = ext->end; + set.es_lsm = lsm; + set.es_exp = cmobd->cm_master_exp; + set.es_ext_sz = CMOBD_MAX_EXTENT_SZ; + set.es_count = 0; + memcpy(&set.es_oa, oa, sizeof(*oa)); + + INIT_LIST_HEAD(&set.es_pages); + spin_lock_init(&set.es_lock); + init_waitqueue_head(&set.es_waitq); + + if (set.es_extent.end < set.es_extent.start) { + CDEBUG(D_HA, "illegal extent in write replay\n"); + GOTO(out, rc = -EINVAL); + } + /* start of extent is extended to page boundaries */ + set.es_extent.start -= set.es_extent.start & ~PAGE_MASK; + /* if the end of extent is EOF, set it as file size */ + if (set.es_extent.end == OBD_OBJECT_EOF) { + set.es_extent.end = cmobd_fid2size(cmobd->cm_cache_exp, + oa->o_id, oa->o_gr) - 1; + if (set.es_extent.end <= 0) + GOTO(out, rc = 0); + } + + buf_count = split_extent(&set.es_extent, set.es_ext_sz); + for (i = 0, start = set.es_extent.start; i < buf_count; + i++, start += set.es_ext_sz) { + OBD_SLAB_ALLOC(ex, cmobd_extent_slab, SLAB_NOFS, sizeof(*ex)); + if (ex == NULL) { + CERROR("not enough memory\n"); + break; + } + + INIT_LIST_HEAD(&ex->ei_link); + ex->ei_set = &set; + ex->ei_extent.start = start; + ex->ei_extent.end = start + set.es_ext_sz - 1; + if (ex->ei_extent.end > set.es_extent.end) + ex->ei_extent.end = set.es_extent.end; + + cmobd_queue_extent(obd, ex); + } + + l_wait_event(set.es_waitq, extent_set_done(&set, 1), &lwi); + + /* fire remaining ios */ + spin_lock_irqsave(&set.es_lock, flags); + list_for_each_safe (pos, n, &set.es_pages) { + cmap = list_entry(pos, struct cmobd_async_page, cmap_link); + + /* locked pages are in flight */ + if (PageLocked(cmap->cmap_page)) + continue; + + spin_unlock_irqrestore(&set.es_lock, flags); + rc = obd_set_async_flags(set.es_exp, set.es_lsm, NULL, + cmap->cmap_cookie, + ASYNC_URGENT); + if (rc) + CERROR("cmobd set async flags failed\n"); + spin_lock_irqsave(&set.es_lock, flags); + break; + } + spin_unlock_irqrestore(&set.es_lock, flags); + + l_wait_event(set.es_waitq, extent_set_done(&set, 2), &lwi); +out: + cmobd_free_lsm(&lsm); + RETURN(rc); +} diff --git a/lustre/cmobd/lproc_cm.c b/lustre/cmobd/lproc_cm.c new file mode 100644 index 0000000..7951a73 --- /dev/null +++ b/lustre/cmobd/lproc_cm.c @@ -0,0 +1,37 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#define DEBUG_SUBSYSTEM S_CLASS + +#include +#include + +#ifndef LPROCFS +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +#else + +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; + +#endif /* LPROCFS */ + +LPROCFS_INIT_VARS(cmobd, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/cobd/.cvsignore b/lustre/cobd/.cvsignore index e995588..642e2e6 100644 --- a/lustre/cobd/.cvsignore +++ b/lustre/cobd/.cvsignore @@ -1,3 +1,10 @@ .deps Makefile -Makefile.in +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.cmd +.*.flags +.tmp_versions +.depend diff --git a/lustre/cobd/Makefile.in b/lustre/cobd/Makefile.in new file mode 100644 index 0000000..4f10283 --- /dev/null +++ b/lustre/cobd/Makefile.in @@ -0,0 +1,4 @@ +MODULES := cobd +cobd-objs := cache_obd.o lproc_cache.o + +@INCLUDE_RULES@ diff --git a/lustre/cobd/Makefile.am b/lustre/cobd/autoMakefile.am similarity index 50% rename from lustre/cobd/Makefile.am rename to lustre/cobd/autoMakefile.am index 781c6ce..ab8b4db 100644 --- a/lustre/cobd/Makefile.am +++ b/lustre/cobd/autoMakefile.am @@ -3,13 +3,9 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -DEFS= +if MODULES +modulefs_DATA := cobd$(KMODEXT) +endif -MODULE = cobd -modulefs_DATA = cobd.o -EXTRA_PROGRAMS = cobd -LINX= - -cobd_SOURCES = cache_obd.c lproc_cache.c $(LINX) - -include $(top_srcdir)/Rules +DIST_SOURCES = $(cobd-objs:.o=.c) +MOSTLYCLEANFILES = *.o *.ko *.mod.c diff --git a/lustre/cobd/cache_obd.c b/lustre/cobd/cache_obd.c index 0a27122..17181f3 100644 --- a/lustre/cobd/cache_obd.c +++ b/lustre/cobd/cache_obd.c @@ -27,240 +27,1184 @@ #include #include #include +#include +#include #include #include -static int cobd_attach(struct obd_device *dev, obd_count len, void *data) +static int cobd_attach(struct obd_device *obd, obd_count len, void *buf) { struct lprocfs_static_vars lvars; - + lprocfs_init_vars(cobd, &lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); + return lprocfs_obd_attach(obd, lvars.obd_vars); } -static int cobd_detach(struct obd_device *dev) +static int cobd_detach(struct obd_device *obd) { - return lprocfs_obd_detach(dev); + return lprocfs_obd_detach(obd); } -static int -cobd_setup (struct obd_device *dev, obd_count len, void *buf) +static int cobd_setup(struct obd_device *obd, obd_count len, void *buf) { struct lustre_cfg *lcfg = (struct lustre_cfg *)buf; - struct cache_obd *cobd = &dev->u.cobd; - struct obd_device *target; + struct cache_obd *cobd = &obd->u.cobd; + struct obd_device *real; struct obd_device *cache; - struct obd_uuid target_uuid; + struct obd_uuid real_uuid; struct obd_uuid cache_uuid; - struct lustre_handle target_conn = {0,}, cache_conn = {0,}; - int rc; + struct lustre_handle real_conn = {0,}, cache_conn = {0,}; + int rc; - if (lcfg->lcfg_inlbuf1 == NULL || - lcfg->lcfg_inlbuf2 == NULL) + if (lcfg->lcfg_inllen1 == 0 || lcfg->lcfg_inlbuf1 == NULL) { + CERROR("%s: setup requires real device name\n", + obd->obd_name); return (-EINVAL); + } + + real = class_name2obd(lcfg->lcfg_inlbuf1); + if (real == NULL) { + CERROR("%s: unable to find a client for real: %s\n", + obd->obd_name, lcfg->lcfg_inlbuf1); + return (-EINVAL); + } - obd_str2uuid(&target_uuid, lcfg->lcfg_inlbuf1); - target = class_uuid2obd (&target_uuid); + if (lcfg->lcfg_inllen2 == 0 || lcfg->lcfg_inlbuf2 == NULL) { + CERROR("%s: setup requires cache device name\n", obd->obd_name); + return (-EINVAL); + } - obd_str2uuid(&cache_uuid, lcfg->lcfg_inlbuf2); - cache = class_uuid2obd (&cache_uuid); - if (target == NULL || - cache == NULL) + cache = class_name2obd(lcfg->lcfg_inlbuf2); + if (cache == NULL) { + CERROR("%s: unable to find a client for cache: %s\n", + obd->obd_name, lcfg->lcfg_inlbuf2); return (-EINVAL); + } /* don't bother checking attached/setup; * obd_connect() should, and it can change underneath us */ - rc = obd_connect(&target_conn, target, &target_uuid); + rc = obd_connect(&real_conn, real, &real_uuid); if (rc != 0) return (rc); - cobd->cobd_target_exp = class_conn2export(&target_conn); + cobd->cobd_real_exp = class_conn2export(&real_conn); rc = obd_connect(&cache_conn, cache, &cache_uuid); if (rc != 0) { - obd_disconnect(cobd->cobd_target_exp, 0); + obd_disconnect(cobd->cobd_real_exp, 0); return rc; } cobd->cobd_cache_exp = class_conn2export(&cache_conn); + /* set mds_num for lustre */ - return rc; + if (!strcmp(real->obd_type->typ_name, LUSTRE_MDC_NAME)) { + int mds_num; + mds_num = REAL_MDS_NUMBER; + obd_set_info(cobd->cobd_real_exp, strlen("mds_num"), + "mds_num", sizeof(mds_num), &mds_num); + mds_num = CACHE_MDS_NUMBER; + obd_set_info(cobd->cobd_cache_exp, strlen("mds_num"), + "mds_num", sizeof(mds_num), &mds_num); + } + /*default write to real obd*/ + cobd->cache_on = 1; + return 0; } -static int cobd_cleanup(struct obd_device *dev, int flags) +static int cobd_cleanup(struct obd_device *obd, int flags) { - struct cache_obd *cobd = &dev->u.cobd; + struct cache_obd *cobd = &obd->u.cobd; int rc; - if (!list_empty(&dev->obd_exports)) + if (!list_empty(&obd->obd_exports)) return (-EBUSY); - - rc = obd_disconnect(cobd->cobd_cache_exp, flags); - if (rc != 0) - CERROR ("error %d disconnecting cache\n", rc); - - rc = obd_disconnect(cobd->cobd_target_exp, flags); + if (cobd->cache_on) { + rc = obd_disconnect(cobd->cobd_cache_exp, flags); + if (rc != 0) + CERROR("error %d disconnecting cache\n", rc); + } + rc = obd_disconnect(cobd->cobd_real_exp, flags); if (rc != 0) - CERROR ("error %d disconnecting target\n", rc); + CERROR("error %d disconnecting real\n", rc); + + return (rc); +} - return (0); +struct obd_export *cobd_get_exp(struct obd_device *obd) +{ + struct cache_obd *cobd = &obd->u.cobd; + + if (cobd->cache_on) + return cobd->cobd_cache_exp; + else + return cobd->cobd_real_exp; } static int -cobd_connect (struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) +cobd_connect(struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid) { - int rc = class_connect (conn, obd, cluuid); - - CERROR ("rc %d\n", rc); - return (rc); + int rc; + rc = class_connect(conn, obd, cluuid); + return rc; } static int cobd_disconnect(struct obd_export *exp, int flags) { - int rc = class_disconnect(exp, flags); - - CERROR ("rc %d\n", rc); - return (rc); + int rc; + rc = class_disconnect(exp, 0); + return rc; } static int cobd_get_info(struct obd_export *exp, obd_count keylen, void *key, __u32 *vallen, void *val) { struct obd_device *obd = class_exp2obd(exp); - struct cache_obd *cobd; - + struct obd_export *cobd_exp; if (obd == NULL) { CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); return -EINVAL; } + cobd_exp = cobd_get_exp(obd); + /* intercept cache utilisation info? */ + + return obd_get_info(cobd_exp, keylen, key, vallen, val); +} - cobd = &obd->u.cobd; +static int cobd_set_info(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); /* intercept cache utilisation info? */ - return obd_get_info(cobd->cobd_target_exp, keylen, key, vallen, val); + return obd_set_info(cobd_exp, keylen, key, vallen, val); } static int cobd_statfs(struct obd_device *obd, struct obd_statfs *osfs, unsigned long max_age) { - return obd_statfs(class_exp2obd(obd->u.cobd.cobd_target_exp), osfs, - max_age); + struct obd_export *cobd_exp; + + cobd_exp = cobd_get_exp(obd); + + return obd_statfs(class_exp2obd(cobd_exp), osfs, max_age); +} + +static int cobd_packmd(struct obd_export *exp, + struct lov_mds_md **disk_tgt, + struct lov_stripe_md *mem_src) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_packmd(cobd_exp, disk_tgt, mem_src); +} + +static int cobd_unpackmd(struct obd_export *exp, + struct lov_stripe_md **mem_tgt, + struct lov_mds_md *disk_src, + int disk_len) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_unpackmd(cobd_exp, mem_tgt, disk_src, disk_len); +} + +static int cobd_create(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md **ea, + struct obd_trans_info *oti) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_create(cobd_exp, obdo, ea, oti); +} + +static int cobd_destroy(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md *ea, + struct obd_trans_info *oti) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_destroy(cobd_exp, obdo, ea, oti); +} + +static int cobd_precleanup(struct obd_device *obd, int flags) +{ + /*FIXME Do we need some cleanup here?*/ + return 0; } static int cobd_getattr(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm) { struct obd_device *obd = class_exp2obd(exp); - struct cache_obd *cobd; + struct obd_export *cobd_exp; if (obd == NULL) { CERROR("invalid client cookie "LPX64"\n", - exp->exp_handle.h_cookie); + exp->exp_handle.h_cookie); return -EINVAL; } + cobd_exp = cobd_get_exp(obd); + return obd_getattr(cobd_exp, oa, lsm); +} + +static int cobd_getattr_async(struct obd_export *exp, + struct obdo *obdo, struct lov_stripe_md *ea, + struct ptlrpc_request_set *set) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - cobd = &obd->u.cobd; - return obd_getattr(cobd->cobd_target_exp, oa, lsm); + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_getattr_async(cobd_exp, obdo, ea, set); } -static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb, - struct niobuf_local *res, struct obd_trans_info *oti) +static int cobd_setattr(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md *ea, + struct obd_trans_info *oti) { + struct obd_device *obd = class_exp2obd(exp); struct obd_export *cobd_exp; - int rc; - if (exp->exp_obd == NULL) + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_setattr(cobd_exp, obdo, ea, oti); +} - if ((cmd & OBD_BRW_WRITE) != 0) - return -EOPNOTSUPP; +static int cobd_md_getstatus(struct obd_export *exp, struct ll_fid *rootfid) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_getstatus(cobd_exp, rootfid); +} - cobd_exp = exp->exp_obd->u.cobd.cobd_target_exp; - rc = obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, res, - oti); +static int cobd_brw(int cmd, struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_count oa_bufs, + struct brw_page *pg, struct obd_trans_info *oti) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - return rc; + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_brw(cmd, cobd_exp, oa, ea, oa_bufs, pg, oti); } -static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, - struct obd_trans_info *oti, int rc) +static int cobd_brw_async(int cmd, struct obd_export *exp, + struct obdo *oa, struct lov_stripe_md *ea, + obd_count oa_bufs, struct brw_page *pg, + struct ptlrpc_request_set *set, + struct obd_trans_info *oti) { + struct obd_device *obd = class_exp2obd(exp); struct obd_export *cobd_exp; - if (exp->exp_obd == NULL) + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_brw_async(cmd, cobd_exp, oa, ea, oa_bufs, + pg, set, oti); +} - if ((cmd & OBD_BRW_WRITE) != 0) - return -EOPNOTSUPP; +static int cobd_prep_async_page(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct page *page, obd_off offset, + struct obd_async_page_ops *ops, + void *data, void **res) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - cobd_exp = exp->exp_obd->u.cobd.cobd_target_exp; - rc = obd_commitrw(cmd, cobd_exp, oa, objcount, obj, niocount, local, - oti, rc); - return rc; + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_prep_async_page(cobd_exp, lsm, loi, page, offset, + ops, data, res); } -static int cobd_brw(int cmd, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_trans_info *oti) +static int cobd_queue_async_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + int cmd, obd_off off, int count, + obd_flag brw_flags, obd_flag async_flags) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_queue_async_io(cobd_exp, lsm, loi, cookie, cmd, off, count, + brw_flags, async_flags); +} + +static int cobd_set_async_flags(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + obd_flag async_flags) { struct obd_device *obd = class_exp2obd(exp); - struct cache_obd *cobd; + struct obd_export *cobd_exp; if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", + CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); return -EINVAL; } + cobd_exp = cobd_get_exp(obd); + return obd_set_async_flags(cobd_exp, lsm, loi, cookie, async_flags); +} - if ((cmd & OBD_BRW_WRITE) != 0) - return -EOPNOTSUPP; +static int cobd_queue_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, + int count, obd_flag brw_flags, + obd_flag async_flags) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - cobd = &obd->u.cobd; - return obd_brw(cmd, cobd->cobd_target_exp, oa, lsm, oa_bufs, pga, oti); + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_queue_group_io(cobd_exp, lsm, loi, oig, cookie, + cmd, off, count, brw_flags, async_flags); } -static int cobd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg) +static int cobd_trigger_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_trigger_group_io(cobd_exp, lsm, loi, oig); +} + +static int cobd_teardown_async_page(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_teardown_async_page(cobd_exp, lsm, loi, cookie); +} + +static int cobd_punch(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_size start, + obd_size end, struct obd_trans_info *oti) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_punch(cobd_exp, oa, ea, start, end, oti); +} + +static int cobd_sync(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_size start, + obd_size end) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_sync(cobd_exp, oa, ea, start, end); +} + +static int cobd_enqueue(struct obd_export *exp, struct lov_stripe_md *ea, + __u32 type, ldlm_policy_data_t *policy, + __u32 mode, int *flags, void *bl_cb, void *cp_cb, + void *gl_cb, void *data, __u32 lvb_len, + void *lvb_swabber, struct lustre_handle *lockh) { struct obd_device *obd = class_exp2obd(exp); - struct cache_obd *cobd; + struct obd_export *cobd_exp; if (obd == NULL) { CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); return -EINVAL; } + cobd_exp = cobd_get_exp(obd); + return obd_enqueue(cobd_exp, ea, type, policy, mode, flags, + bl_cb, cp_cb, gl_cb, data, lvb_len, + lvb_swabber, lockh); +} + +static int cobd_match(struct obd_export *exp, struct lov_stripe_md *ea, + __u32 type, ldlm_policy_data_t *policy, __u32 mode, + int *flags, void *data, struct lustre_handle *lockh) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - /* intercept? */ + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_match(cobd_exp, ea, type, policy, mode, flags, data, + lockh); +} +static int cobd_change_cbdata(struct obd_export *exp, + struct lov_stripe_md *lsm, + ldlm_iterator_t it, void *data) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - cobd = &obd->u.cobd; - return obd_iocontrol(cmd, cobd->cobd_target_exp, len, karg, uarg); + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_change_cbdata(cobd_exp, lsm, it, data); } -static struct obd_ops cobd_ops = { - o_owner: THIS_MODULE, - o_attach: cobd_attach, - o_detach: cobd_detach, +static int cobd_cancel(struct obd_export *exp, + struct lov_stripe_md *ea, __u32 mode, + struct lustre_handle *lockh) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - o_setup: cobd_setup, - o_cleanup: cobd_cleanup, + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_cancel(cobd_exp, ea, mode, lockh); +} - o_connect: cobd_connect, - o_disconnect: cobd_disconnect, +static int cobd_cancel_unused(struct obd_export *exp, + struct lov_stripe_md *ea, int flags, + void *opaque) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; - o_get_info: cobd_get_info, - o_statfs: cobd_statfs, + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_cancel_unused(cobd_exp, ea, flags, opaque); +} + +static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_remote *nb, + struct niobuf_local *res, struct obd_trans_info *oti) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, + res, oti); +} + +static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_local *local, + struct obd_trans_info *oti, int rc) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return obd_commitrw(cmd, cobd_exp, oa, objcount, obj, niocount, + local, oti, rc); +} + +static int cobd_flush(struct obd_device *obd) +{ + /*FLUSH the filesystem from the cache + *to the real device */ + return 0; +} + +static int cobd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void *uarg) +{ + struct obd_device *obd = class_exp2obd(exp); + struct cache_obd *cobd = &obd->u.cobd; + struct obd_device *real_dev = class_exp2obd(cobd->cobd_real_exp); + struct obd_device *cache_dev = class_exp2obd(cobd->cobd_cache_exp); + struct obd_export *cobd_exp; + int rc = 0; + + switch (cmd) { + case OBD_IOC_COBD_CON: + if (!cobd->cache_on) { + cobd->cache_on = 1; + /*FIXME should connect the cache obd again*/ + } + break; + case OBD_IOC_COBD_COFF: + if (cobd->cache_on) { + /*Here disconnect for cancel unused ldlm resources, + *then do flush, otherwise, there will be some problems + *in flush cache + *is is right? FIXME later*/ + rc = obd_disconnect(cobd->cobd_cache_exp, 0); + if (rc != 0) + CERROR("error %d disconnecting real\n", rc); + + cobd->cache_on = 0; + /*FIXME, should read from real_dev*/ + real_dev->u.cli.cl_max_mds_easize = + cache_dev->u.cli.cl_max_mds_easize; + real_dev->u.cli.cl_max_mds_cookiesize = + cache_dev->u.cli.cl_max_mds_cookiesize; + break; + } + case OBD_IOC_COBD_CFLUSH: + if (cobd->cache_on) { + cobd->cache_on = 0; + cobd_flush(obd); + break; + } + default: + cobd_exp = cobd_get_exp(obd); + rc = obd_iocontrol(cmd, cobd_exp, len, karg, uarg); + } + + return rc; +} + +static int cobd_llog_init(struct obd_device *obd, struct obd_llogs *llogs, + struct obd_device *disk_obd, int count, + struct llog_catid *logid) +{ + struct obd_export *cobd_exp; + struct obd_device *cobd_obd; + + cobd_exp = cobd_get_exp(obd); + cobd_obd = class_exp2obd(cobd_exp); + + return obd_llog_init(cobd_obd, &cobd_obd->obd_llogs, + disk_obd, count, logid); +} + +static int cobd_llog_finish(struct obd_device *obd, struct obd_llogs *llogs, + int count) +{ + struct obd_export *cobd_exp; + struct obd_device *cobd_obd; + + cobd_exp = cobd_get_exp(obd); + cobd_obd = class_exp2obd(cobd_exp); + + return obd_llog_finish(cobd_obd, &cobd_obd->obd_llogs, count); +} + +static int cobd_notify(struct obd_device *obd, + struct obd_device *watched, + int active) +{ + struct obd_export *cobd_exp; + + cobd_exp = cobd_get_exp(obd); + + return obd_notify(class_exp2obd(cobd_exp), watched, active); +} + +static int cobd_pin(struct obd_export *exp, obd_id ino, __u32 gen, + int type, struct obd_client_handle *handle, int flag) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return obd_pin(cobd_exp, ino, gen, type, handle, flag); +} + +static int cobd_unpin(struct obd_export *exp, + struct obd_client_handle *handle, int flag) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return obd_unpin(cobd_exp, handle, flag); +} + +static int cobd_init_ea_size(struct obd_export *exp, int easize, int cookiesize) +{ + struct obd_export *cobd_exp; + + cobd_exp = cobd_get_exp(exp->exp_obd); + return obd_init_ea_size(cobd_exp, easize, cookiesize); +} + +static int cobd_import_event(struct obd_device *obd, + struct obd_import *imp, + enum obd_import_event event) +{ + struct obd_export *cobd_exp; + + cobd_exp = cobd_get_exp(obd); + + obd_import_event(class_exp2obd(cobd_exp), imp, event); + + return 0; +} + +static int cobd_md_getattr(struct obd_export *exp, struct ll_fid *fid, + unsigned long valid, unsigned int ea_size, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_getattr(cobd_exp, fid, valid, ea_size, request); +} + +static int cobd_md_req2lustre_md (struct obd_export *mdc_exp, + struct ptlrpc_request *req, unsigned int offset, + struct obd_export *osc_exp, struct lustre_md *md) +{ + struct obd_device *obd = class_exp2obd(mdc_exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + mdc_exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_req2lustre_md(cobd_exp, req, offset, osc_exp, md); +} + +static int cobd_md_change_cbdata(struct obd_export *exp, struct ll_fid *fid, + ldlm_iterator_t it, void *data) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_change_cbdata(cobd_exp, fid, it, data); +} + +static int cobd_md_getattr_name(struct obd_export *exp, struct ll_fid *fid, + char *filename, int namelen, + unsigned long valid, + unsigned int ea_size, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_getattr_name(cobd_exp, fid, filename, namelen, valid, + ea_size, request); +} + +static int cobd_md_create(struct obd_export *exp, struct mdc_op_data *op_data, + const void *data, int datalen, int mode, + __u32 uid, __u32 gid, __u64 rdev, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_create(cobd_exp, op_data, data, datalen, mode, + uid, gid, rdev, request); +} + +static int cobd_md_unlink(struct obd_export *exp, struct mdc_op_data *data, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_unlink(cobd_exp, data, request); +} + +static int cobd_md_valid_attrs(struct obd_export *exp, struct ll_fid *fid) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_valid_attrs(cobd_exp, fid); +} + +static int cobd_md_rename(struct obd_export *exp, struct mdc_op_data *data, + const char *old, int oldlen, const char *new, + int newlen, struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_rename(cobd_exp, data, old, oldlen, new, newlen, request); +} + +static int cobd_md_link(struct obd_export *exp, struct mdc_op_data *data, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_link(cobd_exp, data, request); +} + +static int cobd_md_setattr(struct obd_export *exp, struct mdc_op_data *data, + struct iattr *iattr, void *ea, int ealen, void *ea2, + int ea2len, struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_setattr(cobd_exp, data, iattr, ea, ealen, ea2, ea2len, request); +} + +static int cobd_md_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, + __u64 offset, struct page *page, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_readpage(cobd_exp, mdc_fid, offset, page, request); +} + +static int cobd_md_close(struct obd_export *exp, struct obdo *obdo, + struct obd_client_handle *och, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_close(cobd_exp, obdo, och, request); +} + +static int cobd_md_done_writing(struct obd_export *exp, struct obdo *obdo) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_done_writing(cobd_exp, obdo); +} + +static int cobd_md_sync(struct obd_export *exp, struct ll_fid *fid, + struct ptlrpc_request **request) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return md_sync(cobd_exp, fid, request); +} + +static int cobd_md_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return md_set_open_replay_data(cobd_exp, och, open_req); +} + +static int cobd_md_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return md_clear_open_replay_data(cobd_exp, och); +} + +static int cobd_md_store_inode_generation(struct obd_export *exp, + struct ptlrpc_request *req, + int reqoff, int repoff) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return md_store_inode_generation(cobd_exp, req, reqoff, repoff); +} + +static int cobd_md_set_lock_data(struct obd_export *exp, __u64 *l, void *data) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + + return md_set_lock_data(cobd_exp, l, data); +} + +static int cobd_md_enqueue(struct obd_export *exp, int lock_type, + struct lookup_intent *it, int lock_mode, + struct mdc_op_data *data, struct lustre_handle *lockh, + void *lmm, int lmmsize, + ldlm_completion_callback cb_completion, + ldlm_blocking_callback cb_blocking, void *cb_data) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_enqueue(cobd_exp, lock_type, it, lock_mode, data, + lockh, lmm, lmmsize, cb_completion, cb_blocking, + cb_data); +} + +static int cobd_md_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, + struct ll_fid *pfid, const char *name, int len, + void *lmm, int lmmsize, + struct ll_fid *cfid, struct lookup_intent *it, + int lookup_flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_intent_lock(cobd_exp, uctxt, pfid, name, len, lmm, lmmsize, + cfid, it, lookup_flags, reqp, cb_blocking); +} + +static struct obd_device * cobd_md_get_real_obd(struct obd_export *exp, + char *name, int len) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return NULL; + } + cobd_exp = cobd_get_exp(obd); + return md_get_real_obd(cobd_exp, name, len); +} + +static int cobd_md_change_cbdata_name(struct obd_export *exp, + struct ll_fid *fid, char *name, + int namelen, struct ll_fid *fid2, + ldlm_iterator_t it, void *data) +{ + struct obd_device *obd = class_exp2obd(exp); + struct obd_export *cobd_exp; + + if (obd == NULL) { + CERROR("invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + return -EINVAL; + } + cobd_exp = cobd_get_exp(obd); + return md_change_cbdata_name(cobd_exp, fid, name, namelen, fid2, it, + data); +} +static struct obd_ops cobd_obd_ops = { + .o_owner = THIS_MODULE, + .o_attach = cobd_attach, + .o_detach = cobd_detach, + .o_setup = cobd_setup, + .o_cleanup = cobd_cleanup, + .o_connect = cobd_connect, + .o_disconnect = cobd_disconnect, + .o_set_info = cobd_set_info, + .o_get_info = cobd_get_info, + .o_statfs = cobd_statfs, + + .o_packmd = cobd_packmd, + .o_unpackmd = cobd_unpackmd, + .o_create = cobd_create, + .o_destroy = cobd_destroy, + .o_precleanup = cobd_precleanup, + .o_getattr = cobd_getattr, + .o_getattr_async = cobd_getattr_async, + .o_setattr = cobd_setattr, + + .o_brw = cobd_brw, + .o_brw_async = cobd_brw_async, + .o_prep_async_page = cobd_prep_async_page, + .o_queue_async_io = cobd_queue_async_io, + .o_set_async_flags = cobd_set_async_flags, + .o_queue_group_io = cobd_queue_group_io, + .o_trigger_group_io = cobd_trigger_group_io, + .o_teardown_async_page = cobd_teardown_async_page, + .o_preprw = cobd_preprw, + .o_punch = cobd_punch, + .o_sync = cobd_sync, + .o_enqueue = cobd_enqueue, + .o_match = cobd_match, + .o_change_cbdata = cobd_change_cbdata, + .o_cancel = cobd_cancel, + .o_cancel_unused = cobd_cancel_unused, + .o_iocontrol = cobd_iocontrol, + .o_commitrw = cobd_commitrw, + .o_llog_init = cobd_llog_init, + .o_llog_finish = cobd_llog_finish, + .o_notify = cobd_notify, + .o_pin = cobd_pin, + .o_unpin = cobd_unpin, + .o_import_event = cobd_import_event, + .o_init_ea_size = cobd_init_ea_size, +}; - o_getattr: cobd_getattr, - o_preprw: cobd_preprw, - o_commitrw: cobd_commitrw, - o_brw: cobd_brw, - o_iocontrol: cobd_iocontrol, +struct md_ops cobd_md_ops = { + .m_getstatus = cobd_md_getstatus, + .m_getattr = cobd_md_getattr, + .m_req2lustre_md = cobd_md_req2lustre_md, + .m_change_cbdata = cobd_md_change_cbdata, + .m_getattr_name = cobd_md_getattr_name, + .m_create = cobd_md_create, + .m_unlink = cobd_md_unlink, + .m_valid_attrs = cobd_md_valid_attrs, + .m_rename = cobd_md_rename, + .m_link = cobd_md_link, + .m_setattr = cobd_md_setattr, + .m_readpage = cobd_md_readpage, + .m_close = cobd_md_close, + .m_done_writing = cobd_md_done_writing, + .m_sync = cobd_md_sync, + .m_set_open_replay_data = cobd_md_set_open_replay_data, + .m_clear_open_replay_data = cobd_md_clear_open_replay_data, + .m_store_inode_generation = cobd_md_store_inode_generation, + .m_set_lock_data = cobd_md_set_lock_data, + .m_enqueue = cobd_md_enqueue, + .m_get_real_obd = cobd_md_get_real_obd, + .m_intent_lock = cobd_md_intent_lock, + .m_change_cbdata_name = cobd_md_change_cbdata_name, }; static int __init cobd_init(void) @@ -271,8 +1215,8 @@ static int __init cobd_init(void) printk(KERN_INFO "Lustre: Caching OBD driver; info@clusterfs.com\n"); lprocfs_init_vars(cobd, &lvars); - RETURN(class_register_type(&cobd_ops, NULL, lvars.module_vars, - OBD_CACHE_DEVICENAME)); + RETURN(class_register_type(&cobd_obd_ops, &cobd_md_ops, + lvars.module_vars, OBD_CACHE_DEVICENAME)); } static void /*__exit*/ cobd_exit(void) diff --git a/lustre/cobd/lproc_cache.c b/lustre/cobd/lproc_cache.c index 8e698ce..25b1e3a 100644 --- a/lustre/cobd/lproc_cache.c +++ b/lustre/cobd/lproc_cache.c @@ -41,7 +41,7 @@ static int cobd_rd_target(char *page, char **start, off_t off, int count, rc = snprintf(page, count, "not set up\n"); } else { struct obd_device *tgt = - class_exp2obd(cobd->u.cobd.cobd_target_exp); + class_exp2obd(cobd->u.cobd.cobd_real_exp); LASSERT(tgt != NULL); rc = snprintf(page, count, "%s\n", tgt->obd_uuid.uuid); } diff --git a/lustre/conf/Makefile.am b/lustre/conf/Makefile.am index a205d10..6e3666b 100644 --- a/lustre/conf/Makefile.am +++ b/lustre/conf/Makefile.am @@ -10,6 +10,3 @@ ldapconf_SCRIPTS = slapd-lustre.conf ldapschema_SCRIPTS = lustre.schema pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)' pkglib_DATA = top.ldif lustre2ldif.xsl - -include $(top_srcdir)/Rules - diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd index 96fdc8e..8fd57d8 100644 --- a/lustre/conf/lustre.dtd +++ b/lustre/conf/lustre.dtd @@ -21,12 +21,12 @@ + nettype (tcp | elan | gm) 'tcp'> - diff --git a/lustre/configure.in b/lustre/configure.in index bff0966..c57a7da 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -5,60 +5,156 @@ AC_INIT AC_CANONICAL_SYSTEM +AM_INIT_AUTOMAKE(lustre, HEAD) +# AM_MAINTAINER_MODE -# Automake variables. Steal the version number from lustre.spec.in. -AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*version //; p; q; }' scripts/lustre.spec.in])) -#AM_MAINTAINER_MODE +# Four main targets: lustre kernel modules, utilities, tests, and liblustre +AC_MSG_CHECKING([whether to build kernel modules]) +AC_ARG_ENABLE([modules], + AC_HELP_STRING([--disable-modules], + [disable building of Lustre kernel modules]), + [],[enable_modules='yes']) +AC_MSG_RESULT([$enable_modules]) +AM_CONDITIONAL(MODULES, test x$enable_modules = xyes) -# LLNL patches their ext3 and calls it extN -AC_ARG_ENABLE(extN, [ --enable-extN use extN instead of ext3 for lustre backend]) -AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) +AC_MSG_CHECKING([whether to build Lustre library]) +AC_ARG_ENABLE([liblustre], + AC_HELP_STRING([--disable-liblustre], + [disable building of Lustre library]), + [],[enable_liblustre='yes']) +AC_MSG_RESULT([$enable_liblustre]) +AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes) + +AC_MSG_CHECKING([whether to build utilities]) +AC_ARG_ENABLE([utils], + AC_HELP_STRING([--disable-utils], + [disable building of Lustre utility programs]), + [],[enable_utils='yes']) +AC_MSG_RESULT([$enable_utils]) + +AC_MSG_CHECKING([whether to build Lustre tests]) +AC_ARG_ENABLE([tests], + AC_HELP_STRING([--disable-tests], + [disable building of Lustre tests]), + [],[enable_tests='yes']) +AC_MSG_RESULT([$enable_tests]) # specify wether to build doc or not -AC_ARG_ENABLE(doc, [ --disable-doc skip creation of pdf documentation], - [ - if test x$enable_doc = xyes ; then - ENABLE_DOC=1 - else - ENABLE_DOC=0 - fi - ], - [ ENABLE_DOC=0 ]) +AC_MSG_CHECKING([whether to build docs]) +AC_ARG_ENABLE(doc, + AC_HELP_STRING([--disable-doc], + [skip creation of pdf documentation]), + [ + if test x$enable_doc = xyes ; then + ENABLE_DOC=1 + else + ENABLE_DOC=0 + fi + ],[ + ENABLE_DOC=0 + enable_doc='no' + ]) +AC_MSG_RESULT([$enable_doc]) AM_CONDITIONAL(DOC, test x$ENABLE_DOC = x1) AC_SUBST(ENABLE_DOC) +# default backing fs is ext3 +BACKINGFS='ext3' + +# LLNL patches their ext3 and calls it extN +AC_MSG_CHECKING([whether to use extN]) +AC_ARG_ENABLE([extN], + AC_HELP_STRING([--enable-extN], + [use extN instead of ext3 for lustre backend]), + [BACKINGFS='extN'],[enable_extN='no']) +AC_MSG_RESULT([$enable_extN]) +AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) + +# SuSE gets ldiskfs +AC_MSG_CHECKING([whether to enable ldiskfs]) +AC_ARG_ENABLE([ldiskfs], + AC_HELP_STRING([--enable-ldiskfs], + [use ldiskfs for the Lustre backing FS]), + [BACKINGFS='ldiskfs'],[enable_ldiskfs='no']) +AC_MSG_RESULT([$enable_ldiskfs]) +AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes) + +AC_MSG_CHECKING([which backing filesystem to use]) +AC_MSG_RESULT([$BACKINGFS]) +AC_SUBST(BACKINGFS) + # the pinger is temporary, until we have the recovery node in place -AC_ARG_ENABLE(pinger, [ --disable-pinger disable recovery pinger support]) +AC_MSG_CHECKING([whether to enable pinger support]) +AC_ARG_ENABLE([pinger], + AC_HELP_STRING([--disable-pinger], + [disable recovery pinger support]), + [],[enable_pinger='yes']) +AC_MSG_RESULT([$enable_pinger]) if test x$enable_pinger != xno ; then AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger) fi -AC_ARG_WITH(obd-buffer-size, [ --with-obd-buffer-size=[size] set lctl ioctl maximum bytes (default=8192)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192) +AC_MSG_CHECKING([maximum OBD ioctl size]) +AC_ARG_WITH([obd-buffer-size], + AC_HELP_STRING([--with-obd-buffer-size=[size]], + [set lctl ioctl maximum bytes (default=8192)]), + [ + OBD_BUFFER_SIZE=$with_obd_buffer_size + ],[ + OBD_BUFFER_SIZE=8192 + ]) +AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes]) AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size]) # specify location of libsysio tree -AC_ARG_WITH(sysio, [ --with-sysio=[path] set path to libsysio source (default=../libsysio)], sysiodir=$withval) -if test x$sysiodir = x; then - SYSIO='$(top_srcdir)/../libsysio' -else - SYSIO=$sysiodir -fi +AC_MSG_CHECKING([location of libsysio]) +AC_ARG_WITH([sysio], + AC_HELP_STRING([--with-sysio=[path]], + [set path to libsysio source (default=../libsysio)]), + [ + SYSIO=$with_sysio + SYSIO_PATH=$SYSIO + ],[ + SYSIO='$(top_srcdir)/../libsysio' + SYSIO_PATH='../libsysio' + ]) +AC_MSG_RESULT([$SYSIO_PATH]) + +AC_CHECK_FILE([$SYSIO_PATH/src/libsysio.a],[], + [ + if test x$enable_liblustre = xyes ; then + AC_MSG_ERROR([A built libsysio tree is required for building liblustre.]) + fi + ]) AC_SUBST(SYSIO) #build mpi-tests -AC_ARG_ENABLE(mpitests, [ --enable-mpitests build liblustre mpi tests]) +AC_MSG_CHECKING([whether to build mpitests]) +AC_ARG_ENABLE([mpitests], + AC_HELP_STRING([--enable-mpitests], + [build liblustre mpi tests]), + [],[enable_mpitests=no]) +AC_MSG_RESULT([$enable_mpitests]) AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests) -# snap compilation -AC_ARG_ENABLE(snapfs, [ --enable-snapfs build snapfs]) +# snap compilation +AC_MSG_CHECKING([whether to enable snapfs support]) +AC_ARG_ENABLE([snapfs], + AC_HELP_STRING([--enable-snapfs], + [build snapfs]), + [],[enable_snapfs='no']) +AC_MSG_RESULT([$enable_snapfs]) AM_CONDITIONAL(SNAPFS, test x$enable_snapfs = xyes) -# smfs compilation -AC_ARG_ENABLE(smfs, [ --enable-smfs build smfs]) +# smfs compilation +AC_MSG_CHECKING([whether to enable smfs support]) +AC_ARG_ENABLE([smfs], + AC_HELP_STRING([--enable-smfs], + [build smfs]), + [],[enable_smfs='no']) +AC_MSG_RESULT([$enable_smfs]) AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes) - - sinclude(portals/build.m4) sinclude(portals/archdep.m4) @@ -67,20 +163,97 @@ if test x$enable_inkernel = xyes ; then sh -e -x -c '(cp -f $0.mk $0.in)' fi +# we need to pass a full path here for kernel makefiles +INCLUDE_RULES="include $PWD/Rules" +AC_SUBST(INCLUDE_RULES) + +# ptlrpc kernel build requires this +LUSTRE="$PWD" +AC_SUBST(LUSTRE) + +# +AM_CONDITIONAL(UTILS, test x$enable_utils = xyes) +AM_CONDITIONAL(TESTS, test x$enable_tests = xyes) + AM_CONFIG_HEADER(include/config.h) -AC_OUTPUT([Makefile lvfs/Makefile portals/Makefile portals/Kernelenv \ - portals/libcfs/Makefile portals/portals/Makefile \ - portals/unals/Makefile portals/knals/Makefile \ - portals/router/Makefile portals/knals/socknal/Makefile \ - portals/knals/gmnal/Makefile portals/knals/qswnal/Makefile \ - portals/knals/scimacnal/Makefile \ - portals/knals/ibnal/Makefile \ - portals/utils/Makefile portals/tests/Makefile portals/doc/Makefile \ - obdecho/Makefile ptlrpc/Makefile liblustre/Makefile liblustre/tests/Makefile \ - lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \ - cobd/Makefile ptlbd/Makefile conf/Makefile tests/Makefile \ - utils/Makefile utils/Lustre/Makefile obdfilter/Makefile lmv/Makefile \ - obdclass/Makefile smfs/Makefile snapfs/Makefile snapfs/utils/Makefile \ - include/Makefile include/linux/Makefile llite/Makefile doc/Makefile scripts/Makefile \ - scripts/lustre.spec]) +AC_OUTPUT([ +Makefile +Rules +autoMakefile +cobd/Makefile +cobd/autoMakefile +conf/Makefile +doc/Makefile +include/Makefile +include/linux/Makefile +include/lustre/Makefile +ldiskfs/Makefile +ldiskfs/autoMakefile +ldlm/Makefile +liblustre/Makefile +liblustre/tests/Makefile +llite/Makefile +llite/autoMakefile +lov/Makefile +lov/autoMakefile +lmv/Makefile +lmv/autoMakefile +lvfs/Makefile +lvfs/autoMakefile +mdc/Makefile +mdc/autoMakefile +mds/Makefile +mds/autoMakefile +obdclass/Makefile +obdclass/autoMakefile +obdecho/Makefile +obdecho/autoMakefile +obdfilter/Makefile +obdfilter/autoMakefile +osc/Makefile +osc/autoMakefile +ost/Makefile +ost/autoMakefile +portals/Kernelenv +portals/Makefile +portals/autoMakefile +portals/doc/Makefile +portals/knals/Makefile +portals/knals/autoMakefile +portals/knals/gmnal/Makefile +portals/knals/gmnal/autoMakefile +portals/knals/ibnal/Makefile +portals/knals/ibnal/autoMakefile +portals/knals/qswnal/Makefile +portals/knals/qswnal/autoMakefile +portals/knals/socknal/Makefile +portals/knals/socknal/autoMakefile +portals/libcfs/Makefile +portals/libcfs/autoMakefile +portals/portals/Makefile +portals/portals/autoMakefile +portals/router/Makefile +portals/router/autoMakefile +portals/tests/Makefile +portals/tests/autoMakefile +portals/unals/Makefile +portals/utils/Makefile +ptlbd/Makefile +ptlbd/autoMakefile +ptlrpc/Makefile +ptlrpc/autoMakefile +scripts/Makefile +scripts/lustre.spec +scripts/version_tag.pl +smfs/Makefile +smfs/autoMakefile +snapfs/Makefile +snapfs/autoMakefile +snapfs/utils/Makefile +cmobd/Makefile +cmobd/autoMakefile +tests/Makefile +utils/Lustre/Makefile +utils/Makefile +]) diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index dbffef4..a47d5e7 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -122,5 +122,3 @@ doc.old/lustre.lin: dist-hook: rm -rf $(distdir)/figs/CVS - -include $(top_srcdir)/Rules diff --git a/lustre/doc/lmc.1 b/lustre/doc/lmc.1 index c3345e8..28f51ad 100644 --- a/lustre/doc/lmc.1 +++ b/lustre/doc/lmc.1 @@ -59,7 +59,7 @@ The arguments required are: This will create a new node with the given name if not already present. This is also used to specify a specific node for other elements. .TP --nettype -This can be tcp, elan, gm, scimac. +This can be tcp, elan, or gm. .TP --nid nid The network id, e.g. ElanID or IP address as used by Portals. If nid is '*', then the local address of the interface with specified nettype is will be substituted when the node is configured with lconf. An nid of '*' should be used only for the generic client configuration. diff --git a/lustre/doc/lmc.lyx b/lustre/doc/lmc.lyx index fb14d0e..bb6ca2b 100644 --- a/lustre/doc/lmc.lyx +++ b/lustre/doc/lmc.lyx @@ -197,7 +197,7 @@ The arguments required are: --nettype\SpecialChar ~ This can be \series bold -tcp, elan, gm, scimac. +tcp, elan, gm. \layout Description --nid\SpecialChar ~ diff --git a/lustre/include/.cvsignore b/lustre/include/.cvsignore index 7b78c04..a8dd680 100644 --- a/lustre/include/.cvsignore +++ b/lustre/include/.cvsignore @@ -3,6 +3,7 @@ config.log config.status configure config.h +config.h.in stamp-h stamp-h1 stamp-h.in diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am index d532ab5..2a3f201 100644 --- a/lustre/include/Makefile.am +++ b/lustre/include/Makefile.am @@ -4,6 +4,6 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -SUBDIRS = linux +SUBDIRS = linux lustre EXTRA_DIST = config.h.in ioctl.h liblustre.h -include $(top_srcdir)/Rules + diff --git a/lustre/include/config.h.in b/lustre/include/config.h.in deleted file mode 100644 index eca8fdd..0000000 --- a/lustre/include/config.h.in +++ /dev/null @@ -1,61 +0,0 @@ -/* include/config.h.in. Generated from configure.in by autoheader. */ - -/* Use the Pinger */ -#undef ENABLE_PINGER - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* IOCTL Buffer Size */ -#undef OBD_MAX_IOCTL_BUFFER - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* The size of a `unsigned long long', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_LONG_LONG - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Version number of package */ -#undef VERSION diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 460d86e..da6cc8a 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -24,8 +24,8 @@ #ifndef LIBLUSTRE_H__ #define LIBLUSTRE_H__ -#include #include +#include #ifndef __CYGWIN__ #include #include @@ -52,7 +52,7 @@ #define PAGE_SHIFT 12 #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define loff_t __u64 +#define loff_t long long #define ERESTART 2001 typedef unsigned short umode_t; @@ -116,9 +116,6 @@ static inline void *kmalloc(int size, int prot) #define PTR_ERR(a) ((long)(a)) #define ERR_PTR(a) ((void*)((long)(a))) -#define capable(foo) 1 -#define CAP_SYS_ADMIN 1 - typedef struct { void *cwd; }mm_segment_t; @@ -142,7 +139,7 @@ typedef int (write_proc_t)(struct file *file, const char *buffer, ((unsigned char *)&addr)[1], \ ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[3] - + #if defined(__LITTLE_ENDIAN) #define HIPQUAD(addr) \ ((unsigned char *)&addr)[3], \ @@ -240,8 +237,18 @@ static inline int request_module(char *name) #define __MOD_DEC_USE_COUNT(m) do {} while (0) #define MOD_INC_USE_COUNT do {} while (0) #define MOD_DEC_USE_COUNT do {} while (0) -#define try_module_get __MOD_INC_USE_COUNT -#define module_put __MOD_DEC_USE_COUNT +static inline void __module_get(struct module *module) +{ +} + +static inline int try_module_get(struct module *module) +{ + return 1; +} + +static inline void module_put(struct module *module) +{ +} /* module initialization */ extern int init_obdclass(void); @@ -255,14 +262,13 @@ extern int echo_client_init(void); /* general stuff */ -#define jiffies 0 #define EXPORT_SYMBOL(S) -typedef int spinlock_t; +typedef struct { } spinlock_t; typedef __u64 kdev_t; -#define SPIN_LOCK_UNLOCKED 0 +#define SPIN_LOCK_UNLOCKED (spinlock_t) { } #define LASSERT_SPIN_LOCKED(lock) do {} while(0) static inline void spin_lock(spinlock_t *l) {return;} @@ -296,14 +302,7 @@ static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) {} /* random */ -static inline void get_random_bytes(void *ptr, int size) -{ - int *p = (int *)ptr; - int i, count = size/sizeof(int); - - for (i = 0; i< count; i++) - *p++ = rand(); -} +void get_random_bytes(void *ptr, int size); /* memory */ @@ -357,11 +356,6 @@ static inline int kmem_cache_destroy(kmem_cache_t *a) #define PAGE_CACHE_SHIFT 12 #define PAGE_CACHE_MASK PAGE_MASK -/* XXX - * for this moment, liblusre will not rely OST for non-page-aligned write - */ -#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE - struct page { void *addr; unsigned long index; @@ -371,11 +365,12 @@ struct page { /* internally used by liblustre file i/o */ int _offset; int _count; -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - int _managed; -#endif }; +/* 2.4 defines */ +#define PAGE_LIST_ENTRY list +#define PAGE_LIST(page) ((page)->list) + #define kmap(page) (page)->addr #define kunmap(a) do {} while (0) @@ -523,14 +518,15 @@ struct semaphore { int count; }; -#define down(a) do {} while (0) -#define up(a) do {} while (0) -#define down_read(a) do {} while (0) -#define up_read(a) do {} while (0) -#define down_write(a) do {} while (0) -#define up_write(a) do {} while (0) -#define sema_init(a,b) do {} while (0) -#define init_rwsem(a) do {} while (0) +/* use the macro's argument to avoid unused warnings */ +#define down(a) do { (void)a; } while (0) +#define up(a) do { (void)a; } while (0) +#define down_read(a) do { (void)a; } while (0) +#define up_read(a) do { (void)a; } while (0) +#define down_write(a) do { (void)a; } while (0) +#define up_write(a) do { (void)a; } while (0) +#define sema_init(a,b) do { (void)a; } while (0) +#define init_rwsem(a) do { (void)a; } while (0) #define DECLARE_MUTEX(name) \ struct semaphore name = { 1 } static inline void init_MUTEX (struct semaphore *sem) @@ -564,12 +560,23 @@ struct task_struct { int pid; int fsuid; int fsgid; + int max_groups; + int ngroups; + gid_t *groups; __u32 cap_effective; + + struct fs_struct __fs; }; extern struct task_struct *current; - -#define in_group_p(a) 0 /* FIXME */ +int in_group_p(gid_t gid); +static inline int capable(int cap) +{ + if (current->cap_effective & (1 << cap)) + return 1; + else + return 0; +} #define set_current_state(foo) do { current->state = foo; } while (0) @@ -618,6 +625,20 @@ static inline int schedule_timeout(signed long t) #define SIGNAL_MASK_ASSERT() #define KERN_INFO +#include +#if HZ != 1 +#error "liblustre's jiffies currently expects HZ to be 1" +#endif +#define jiffies \ +({ \ + unsigned long _ret = 0; \ + struct timeval tv; \ + if (gettimeofday(&tv, NULL) == 0) \ + _ret = tv.tv_sec; \ + _ret; \ +}) +#define time_after(a, b) ((long)(b) - (long)(a) > 0) +#define time_before(a, b) time_after(b,a) struct timer_list { struct list_head tl_list; @@ -650,11 +671,6 @@ static inline void del_timer(struct timer_list *l) free(l); } -#define time_after(a, b) \ -({ \ - 1; \ -}) - typedef struct { volatile int counter; } atomic_t; #define atomic_read(a) ((a)->counter) @@ -672,6 +688,33 @@ typedef struct { volatile int counter; } atomic_t; #define unlikely(exp) (exp) #endif +/* FIXME sys/capability will finally included linux/fs.h thus + * cause numerous trouble on x86-64. as temporary solution for + * build broken at cary, we copy definition we need from capability.h + * FIXME + */ +struct _cap_struct; +typedef struct _cap_struct *cap_t; +typedef int cap_value_t; +typedef enum { + CAP_EFFECTIVE=0, + CAP_PERMITTED=1, + CAP_INHERITABLE=2 +} cap_flag_t; +typedef enum { + CAP_CLEAR=0, + CAP_SET=1 +} cap_flag_value_t; + +#define CAP_FOWNER 3 +#define CAP_FSETID 4 +#define CAP_SYS_ADMIN 21 + +cap_t cap_get_proc(void); +int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *); + + + /* log related */ static inline int llog_init_commit_master(void) { return 0; } static inline int llog_cleanup_commit_master(int force) { return 0; } diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index cd614f9..b170c5c 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -3,12 +3,9 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution - -pkginclude_HEADERS = lustre_user.h - EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \ lustre_mgmt.h obd_cache.h obd_lov.h lustre_dlm.h lustre_handles.h \ lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \ lustre_export.h lustre_log.h obd_echo.h obd_ptlbd.h obd_trace.h \ lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h obd.h \ - lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h + lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h lustre_smfs.h diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h index 3a7ac42..2712136 100644 --- a/lustre/include/linux/lprocfs_status.h +++ b/lustre/include/linux/lprocfs_status.h @@ -205,6 +205,7 @@ do { \ extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \ lprocfs_##NAME##_init_vars(VAR); \ } while (0) + extern void lprocfs_init_multi_vars(unsigned int idx, struct lprocfs_static_vars *var); /* lprocfs_status.c */ @@ -222,8 +223,8 @@ extern void lprocfs_remove(struct proc_dir_entry *root); extern struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *root, const char *name); -extern int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list); -extern int lprocfs_obd_detach(struct obd_device *dev); +extern int lprocfs_obd_attach(struct obd_device *obd, struct lprocfs_vars *list); +extern int lprocfs_obd_detach(struct obd_device *obd); /* Generic callbacks */ @@ -239,6 +240,8 @@ extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); +extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, + int count, int *eof, void *data); extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data); @@ -325,9 +328,14 @@ static inline int lprocfs_rd_uuid(char *page, char **start, off_t off, static inline int lprocfs_rd_name(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } + int count, int *eof, void *data) +{ return 0; } static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/linux/lustre_cfg.h index 4f230d2..c426cfc 100644 --- a/lustre/include/linux/lustre_cfg.h +++ b/lustre/include/linux/lustre_cfg.h @@ -58,9 +58,13 @@ struct lustre_cfg { char *lcfg_inlbuf3; uint32_t lcfg_inllen4; char *lcfg_inlbuf4; + uint32_t lcfg_inllen5; + char *lcfg_inlbuf5; + uint32_t lcfg_inllen6; + char *lcfg_inlbuf6; - char lcfg_bulk[0]; + char lcfg_bulk[0]; }; #define LCFG_INIT(l, cmd, name) \ @@ -84,6 +88,8 @@ static inline int lustre_cfg_packlen(struct lustre_cfg *lcfg) len += size_round(lcfg->lcfg_inllen2); len += size_round(lcfg->lcfg_inllen3); len += size_round(lcfg->lcfg_inllen4); + len += size_round(lcfg->lcfg_inllen5); + len += size_round(lcfg->lcfg_inllen6); return size_round(len); } @@ -119,6 +125,10 @@ static inline int lustre_cfg_pack(struct lustre_cfg *data, char **pbuf, LOGL(data->lcfg_inlbuf3, data->lcfg_inllen3, ptr); if (data->lcfg_inlbuf4) LOGL(data->lcfg_inlbuf4, data->lcfg_inllen4, ptr); + if (data->lcfg_inlbuf5) + LOGL(data->lcfg_inlbuf5, data->lcfg_inllen5, ptr); + if (data->lcfg_inlbuf6) + LOGL(data->lcfg_inlbuf6, data->lcfg_inllen6, ptr); *plen = len; @@ -141,6 +151,8 @@ static inline int lustre_cfg_unpack(struct lustre_cfg *data, char *pbuf, overlay->lcfg_inlbuf2 = data->lcfg_inlbuf2; overlay->lcfg_inlbuf3 = data->lcfg_inlbuf3; overlay->lcfg_inlbuf4 = data->lcfg_inlbuf4; + overlay->lcfg_inlbuf5 = data->lcfg_inlbuf5; + overlay->lcfg_inlbuf6 = data->lcfg_inlbuf6; memcpy(data, pbuf, sizeof(*data)); @@ -155,6 +167,10 @@ static inline int lustre_cfg_unpack(struct lustre_cfg *data, char *pbuf, LOGU(data->lcfg_inlbuf3, data->lcfg_inllen3, ptr); if (data->lcfg_inlbuf4) LOGU(data->lcfg_inlbuf4, data->lcfg_inllen4, ptr); + if (data->lcfg_inlbuf5) + LOGU(data->lcfg_inlbuf5, data->lcfg_inllen5, ptr); + if (data->lcfg_inlbuf6) + LOGU(data->lcfg_inlbuf6, data->lcfg_inllen6, ptr); return 0; } @@ -226,8 +242,17 @@ static inline int lustre_cfg_getdata(char **buf, int len, void *arg, int kernel) if (lcfg->lcfg_inllen4) { lcfg->lcfg_inlbuf4 = &lcfg->lcfg_bulk[0] + offset; + offset += size_round(lcfg->lcfg_inllen4); } + if (lcfg->lcfg_inllen5) { + lcfg->lcfg_inlbuf5 = &lcfg->lcfg_bulk[0] + offset; + offset += size_round(lcfg->lcfg_inllen5); + } + + if (lcfg->lcfg_inllen6) + lcfg->lcfg_inlbuf6 = &lcfg->lcfg_bulk[0] + offset; + EXIT; return 0; } diff --git a/lustre/include/linux/lustre_cmobd.h b/lustre/include/linux/lustre_cmobd.h new file mode 100644 index 0000000..47b2e34 --- /dev/null +++ b/lustre/include/linux/lustre_cmobd.h @@ -0,0 +1,86 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Data structures for Cache Manager + * + */ + +#ifndef _LUSTRE_CMOBD_H +#define _LUSTRE_CMOBD_H + +#include + +#define LUSTRE_CMOBD_NAME "cmobd" + +#define CMOBD_MAX_THREADS 32UL + +#define CMOBD_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \ + CMOBD_MAX_THREADS), 2UL) + +#define CMOBD_MAX_EXTENT_SZ PTLRPC_MAX_BRW_PAGES * PAGE_SIZE + +#define CMOBD_MAX_EXTENTS 1024 + +/* for keeping the capacity of handle multi extents simultaneously */ +struct cmobd_extent_set { + struct ldlm_extent es_extent; + struct obdo es_oa; + + struct lov_stripe_md *es_lsm; + struct obd_export *es_exp; + + /* maximum length of per sub extent */ + unsigned long es_ext_sz; + /* sub extents count */ + obd_count es_count; + /* pages to be sent */ + struct list_head es_pages; + /* protect the es_pages and es_count */ + spinlock_t es_lock; + + wait_queue_head_t es_waitq; +}; + +struct cmobd_extent_info { + struct list_head ei_link; + struct cmobd_extent_set *ei_set; + struct ldlm_extent ei_extent; +}; + +struct cmobd_async_page { + struct list_head cmap_link; + struct page *cmap_page; + void *cmap_cookie; + struct cmobd_extent_set *cmap_es; +}; + +struct cmobd_write_service { + struct list_head ws_threads; + int ws_nthreads; + spinlock_t ws_thread_lock; + + struct list_head ws_extents; + int ws_nextents; + spinlock_t ws_extent_lock; + wait_queue_head_t ws_waitq_provider; /* extent provider queue */ + wait_queue_head_t ws_waitq_consumer; /* extent consumer queue */ +}; + +#endif /* _LUSTRE_CMOBD_H */ diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 4a4e3a0..120e996 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -52,7 +52,7 @@ #define LTIME_S(time) (time.tv_sec) #define ll_path_lookup path_lookup -#define ll_permission permission +#define ll_permission(inode,mask,nd) permission(inode,mask,nd) #define ll_pgcache_lock(mapping) spin_lock(&mapping->page_lock) #define ll_pgcache_unlock(mapping) spin_unlock(&mapping->page_lock) @@ -111,7 +111,7 @@ static inline int cleanup_group_info(void) #else /* 2.4.. */ #define ll_vfs_create(a,b,c,d) vfs_create(a,b,c) -#define ll_permission(a,b,c) permission(a,b) +#define ll_permission(inode,mask,nd) permission(inode,mask) #define ILOOKUP(sb, ino, test, data) ilookup4(sb, ino, test, data); #define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED #define ll_dev_t int @@ -144,15 +144,15 @@ static inline void clear_page_dirty(struct page *page) #define cpu_online(cpu) (cpu_online_map & (1<dirty_pages) && + list_empty(&mapping->clean_pages) && + list_empty(&mapping->locked_pages)) { + rc = 0; + } + ll_pgcache_unlock(mapping); + + return rc; +} + +static inline int clear_page_dirty_for_io(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (page->mapping && PageDirty(page)) { + ClearPageDirty(page); + ll_pgcache_lock(mapping); + list_del(&page->list); + list_add(&page->list, &mapping->locked_pages); + ll_pgcache_unlock(mapping); + return 1; + } + return 0; +} +#else +static inline int mapping_has_pages(struct address_space *mapping) +{ + return mapping->nrpages > 0; +} +#endif + #endif /* __KERNEL__ */ #endif /* _COMPAT25_H */ diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 48c26ac..687d400 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -90,6 +90,14 @@ typedef enum { * pretty high-risk, though, and would need a lot more testing. */ #define LDLM_FL_CAN_MATCH 0x100000 +/* A lock contributes to the kms calculation until it has finished the part + * of it's cancelation that performs write back on its dirty pages. It + * can remain on the granted list during this whole time. Threads racing + * to update the kms after performing their writeback need to know to + * exclude each others locks from the calculation as they walk the granted + * list. */ +#define LDLM_FL_KMS_IGNORE 0x200000 + /* The blocking callback is overloaded to perform two functions. These flags * indicate which operation should be performed. */ #define LDLM_CB_BLOCKING 1 @@ -102,6 +110,7 @@ typedef enum { #define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW) #define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW) #define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX) +#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL) static ldlm_mode_t lck_compat_array[] = { [LCK_EX] LCK_COMPAT_EX, @@ -109,12 +118,13 @@ static ldlm_mode_t lck_compat_array[] = { [LCK_PR] LCK_COMPAT_PR, [LCK_CW] LCK_COMPAT_CW, [LCK_CR] LCK_COMPAT_CR, - [LCK_NL] LCK_COMPAT_NL + [LCK_NL] LCK_COMPAT_NL, + [LCK_GROUP] LCK_COMPAT_GROUP }; static inline void lockmode_verify(ldlm_mode_t mode) { - LASSERT(mode >= LCK_EX && mode <= LCK_NL); + LASSERT(mode >= LCK_EX && mode <= LCK_GROUP); } static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new) @@ -449,11 +459,6 @@ int ldlm_handle_cancel(struct ptlrpc_request *req); int ldlm_del_waiting_lock(struct ldlm_lock *lock); int ldlm_get_ref(void); void ldlm_put_ref(int force); -#ifndef __KERNEL__ -void liblustre_ldlm_handle_bl_callback(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct ldlm_lock *lock); -#endif /* ldlm_lock.c */ ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res); diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 8b9b235..7360b24 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001 Cluster File Systems, Inc. + * Copyright (C) 2001-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -28,6 +28,7 @@ #ifdef __KERNEL__ #include +#include #include typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, @@ -58,22 +59,65 @@ struct fsfilt_operations { int (* fs_set_md)(struct inode *inode, void *handle, void *md, int size); int (* fs_get_md)(struct inode *inode, void *md, int size); + + /* this method is needed to make IO operation fsfilt nature depend. */ +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + int (* fs_send_bio)(struct inode *inode, struct bio *bio); +#else + int (* fs_send_bio)(struct inode *inode, struct kiobuf *bio); +#endif + + /* methods for getting page from backing fs and putting page there + * during IO. Used on OST. */ + int (* fs_putpage)(struct inode *inode, struct page *page); + struct page *(* fs_getpage)(struct inode *inode, long int index); + ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, loff_t *offset); - int (* fs_add_journal_cb)(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func, + int (* fs_add_journal_cb)(struct obd_device *obd, struct super_block *sb, + __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func, void *cb_data); int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); int (* fs_sync)(struct super_block *sb); - int (* fs_map_inode_page)(struct inode *inode, struct page *page, - unsigned long *blocks, int *created, - int create); + int (* fs_map_inode_pages)(struct inode *inode, struct page **page, + int pages, unsigned long *blocks, + int *created, int create, + struct semaphore *sem); int (* fs_prep_san_write)(struct inode *inode, long *blocks, int nblocks, loff_t newsize); int (* fs_write_record)(struct file *, void *, int size, loff_t *, int force_sync); int (* fs_read_record)(struct file *, void *, int size, loff_t *); int (* fs_setup)(struct obd_device *, struct super_block *); + + int (* fs_post_setup)(struct obd_device *obd, struct vfsmount *mnt); + int (* fs_post_cleanup)(struct obd_device *obd, struct vfsmount *mnt); + int (* fs_get_reint_log_ctxt)(struct super_block *sb, + struct llog_ctxt **ctxt); + int (* fs_set_kml_flags)(struct inode *inode); + int (* fs_set_ost_flags)(struct super_block *sb); + int (* fs_set_mds_flags)(struct super_block *sb); + + int (* fs_set_xattr)(struct inode *inode, void *handle, char *name, + void *buffer, int buffer_size); + int (* fs_get_xattr)(struct inode *inode, char *name, + void *buffer, int buffer_size); + + int (* fs_init_extents_ea)(struct inode *inode); + int (* fs_insert_extents_ea)(struct inode *inode, unsigned long from, + unsigned long num); + int (* fs_write_extents)(struct dentry *dentry, + unsigned long offset, unsigned long blks); + int (* fs_remove_extents_ea)(struct inode *inode, unsigned long from, + unsigned long num); + int (* fs_get_ino_write_extents)(struct super_block *sb, ino_t ino, + char **pbuf, int *size); + int (* fs_free_write_extents)(struct super_block *sb, ino_t ino, + char *pbuf, int size); + int (* fs_get_inode_write_extents)(struct inode *inode, char **pbuf, + int *size); + int (* fs_get_write_extents_num)(struct inode *inode, int* size); + int (* fs_get_op_len)(int, struct fsfilt_objinfo *, int); int (* fs_add_dir_entry)(struct obd_device *, struct dentry *, char *, int, unsigned long, unsigned long, @@ -96,14 +140,45 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define FSFILT_OP_SETATTR 8 #define FSFILT_OP_LINK 9 #define FSFILT_OP_CANCEL_UNLINK 10 - -static inline void *fsfilt_start_log(struct obd_device *obd, - struct inode *inode, int op, - struct obd_trans_info *oti, int logs) +#define FSFILT_OP_NOOP 15 + +/* XXX BUG 3188 -- must return to one set of opcodes */ +#define KML_UNLINK 0x11 +#define KML_RMDIR 0x12 +#define KML_RENAME 0x13 +#define KML_CREATE 0x14 +#define KML_MKDIR 0x15 +#define KML_SYMLINK 0x16 +#define KML_MKNOD 0x17 +#define KML_LINK 0x19 + +#define CACHE_UNLINK 0x21 +#define CACHE_RMDIR 0x22 +#define CACHE_RENAME 0x23 +#define CACHE_CREATE 0x24 +#define CACHE_MKDIR 0x25 +#define CACHE_SYMLINK 0x26 +#define CACHE_MKNOD 0x27 +#define CACHE_LINK 0x29 +#define CACHE_NOOP 0x2f + +#define KML_CACHE_UNLINK 0x31 +#define KML_CACHE_RMDIR 0x32 +#define KML_CACHE_RENAME 0x33 +#define KML_CACHE_CREATE 0x34 +#define KML_CACHE_MKDIR 0x35 +#define KML_CACHE_SYMLINK 0x36 +#define KML_CACHE_MKNOD 0x37 +#define KML_CACHE_LINK 0x39 +#define KML_CACHE_NOOP 0x3f + +static inline void * +fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode, + int op, struct obd_trans_info *oti, int logs) { unsigned long now = jiffies; void *parent_handle = oti ? oti->oti_handle : NULL; - void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs); + void *handle = ops->fs_start(inode, op, parent_handle, logs); CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle); if (oti != NULL) { @@ -120,17 +195,60 @@ static inline void *fsfilt_start_log(struct obd_device *obd, return handle; } -static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, - int op, struct obd_trans_info *oti) +static inline void * +fsfilt_start_log(struct obd_device *obd, struct inode *inode, + int op, struct obd_trans_info *oti, int logs) { - return fsfilt_start_log(obd, inode, op, oti, 0); + return fsfilt_start_ops(obd->obd_fsops, inode, op, oti, logs); } -static inline void *fsfilt_brw_start_log(struct obd_device *obd, - int objcount, - struct fsfilt_objinfo *fso, - int niocount, struct niobuf_local *nb, - struct obd_trans_info *oti, int logs) +static inline void * +fsfilt_start(struct obd_device *obd, struct inode *inode, + int op, struct obd_trans_info *oti) +{ + return fsfilt_start_ops(obd->obd_fsops, inode, op, oti, 0); +} + +static inline void * +llog_fsfilt_start(struct llog_ctxt *ctxt, struct inode *inode, + int op, struct obd_trans_info *oti) +{ + return fsfilt_start_ops(ctxt->loc_fsops, inode, op, oti, 1); +} + +static inline int +fsfilt_commit_ops(struct fsfilt_operations *ops, struct inode *inode, + void *handle, int force_sync) +{ + unsigned long now = jiffies; + int rc = ops->fs_commit(inode, handle, force_sync); + CDEBUG(D_HA, "committing handle %p\n", handle); + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + + return rc; +} + +static inline int +fsfilt_commit(struct obd_device *obd, struct inode *inode, + void *handle, int force_sync) +{ + return fsfilt_commit_ops(obd->obd_fsops, inode, handle, force_sync); +} + +static inline int +llog_fsfilt_commit(struct llog_ctxt *ctxt, struct inode *inode, + void *handle, int force_sync) +{ + return fsfilt_commit_ops(ctxt->loc_fsops, inode, handle, force_sync); +} + +static inline void * +fsfilt_brw_start_log(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_local *nb, struct obd_trans_info *oti, + int logs) { unsigned long now = jiffies; void *parent_handle = oti ? oti->oti_handle : NULL; @@ -153,30 +271,17 @@ static inline void *fsfilt_brw_start_log(struct obd_device *obd, return handle; } -static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, - struct fsfilt_objinfo *fso, int niocount, - struct niobuf_local *nb, - struct obd_trans_info *oti) +static inline void * +fsfilt_brw_start(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_local *nb, struct obd_trans_info *oti) { return fsfilt_brw_start_log(obd, objcount, fso, niocount, nb, oti, 0); } -static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, - void *handle, int force_sync) -{ - unsigned long now = jiffies; - int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); - CDEBUG(D_HA, "committing handle %p\n", handle); - - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); - - return rc; -} - -static inline int fsfilt_commit_async(struct obd_device *obd, - struct inode *inode, void *handle, - void **wait_handle) +static inline int +fsfilt_commit_async(struct obd_device *obd, struct inode *inode, + void *handle, void **wait_handle) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle); @@ -188,8 +293,8 @@ static inline int fsfilt_commit_async(struct obd_device *obd, return rc; } -static inline int fsfilt_commit_wait(struct obd_device *obd, - struct inode *inode, void *handle) +static inline int +fsfilt_commit_wait(struct obd_device *obd, struct inode *inode, void *handle) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_wait(inode, handle); @@ -199,8 +304,9 @@ static inline int fsfilt_commit_wait(struct obd_device *obd, return rc; } -static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, - void *handle, struct iattr *iattr,int do_trunc) +static inline int +fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, + void *handle, struct iattr *iattr, int do_trunc) { unsigned long now = jiffies; int rc; @@ -210,43 +316,119 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, return rc; } -static inline int fsfilt_iocontrol(struct obd_device *obd, struct inode *inode, - struct file *file, unsigned int cmd, - unsigned long arg) +static inline int +fsfilt_iocontrol(struct obd_device *obd, struct inode *inode, + struct file *file, unsigned int cmd, + unsigned long arg) { return obd->obd_fsops->fs_iocontrol(inode, file, cmd, arg); } -static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode, - void *handle, void *md, int size) +static inline int fsfilt_setup(struct obd_device *obd, + struct super_block *fs) +{ + if (obd->obd_fsops->fs_setup) + return obd->obd_fsops->fs_setup(obd, fs); + return 0; +} + +static inline int +fsfilt_set_md(struct obd_device *obd, struct inode *inode, + void *handle, void *md, int size) { return obd->obd_fsops->fs_set_md(inode, handle, md, size); } -static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode, - void *md, int size) +static inline int +fsfilt_get_md(struct obd_device *obd, struct inode *inode, + void *md, int size) { return obd->obd_fsops->fs_get_md(inode, md, size); } -static inline ssize_t fsfilt_readpage(struct obd_device *obd, - struct file *file, char *buf, - size_t count, loff_t *offset) +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static inline int +fsfilt_send_bio(struct obd_device *obd, struct inode *inode, + struct bio *bio) +#else +static inline int +fsfilt_send_bio(struct obd_device *obd, struct inode *inode, + struct kiobuf *bio) +#endif +{ + return obd->obd_fsops->fs_send_bio(inode, bio); +} + +static inline int +fsfilt_putpage(struct obd_device *obd, struct inode *inode, + struct page *page) +{ + int rc = 0; + struct filter_obd *filter; + unsigned long now = jiffies; + + LASSERT(obd != NULL); + LASSERT(inode != NULL); + LASSERT(page != NULL); + + filter = &obd->u.filter; + + if (!obd->obd_fsops->fs_putpage) + return -ENOSYS; + + CDEBUG(D_INFO, "putpage %lx\n", page->index); + + rc = obd->obd_fsops->fs_putpage(inode, page); + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("long putpage time %lus\n", (jiffies - now) / HZ); + + return rc; +} + +static inline struct page * +fsfilt_getpage(struct obd_device *obd, struct inode *inode, + unsigned long index) +{ + struct page *page; + unsigned long now = jiffies; + + LASSERT(obd != NULL); + LASSERT(inode != NULL); + + if (!obd->obd_fsops->fs_getpage) + return ERR_PTR(-ENOSYS); + + CDEBUG(D_INFO, "getpage %lx\n", index); + + page = obd->obd_fsops->fs_getpage(inode, index); + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("long getpage time %lus\n", (jiffies - now) / HZ); + + return page; +} + +static inline ssize_t +fsfilt_readpage(struct obd_device *obd, struct file *file, char *buf, + size_t count, loff_t *offset) { return obd->obd_fsops->fs_readpage(file, buf, count, offset); } -static inline int fsfilt_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func, - void *cb_data) +static inline int +fsfilt_add_journal_cb(struct obd_device *obd, struct super_block *sb, + __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func, + void *cb_data) { - return obd->obd_fsops->fs_add_journal_cb(obd, last_rcvd, - handle, cb_func, cb_data); + return obd->obd_fsops->fs_add_journal_cb(obd, sb, last_rcvd, handle, + cb_func, cb_data); } /* very similar to obd_statfs(), but caller already holds obd_osfs_lock */ -static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *sb, - unsigned long max_age) +static inline int +fsfilt_statfs(struct obd_device *obd, struct super_block *sb, + unsigned long max_age) { int rc = 0; @@ -262,64 +444,157 @@ static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *sb, return rc; } -static inline int fsfilt_sync(struct obd_device *obd, struct super_block *sb) +static inline int +fsfilt_sync(struct obd_device *obd, struct super_block *sb) { return obd->obd_fsops->fs_sync(sb); } -static inline int fsfilt_map_inode_page(struct obd_device *obd, - struct inode *inode, struct page *page, - unsigned long *blocks, int *created, - int create) +static inline int fsfilt_map_inode_pages(struct obd_device *obd, + struct inode *inode, + struct page **page, int pages, + unsigned long *blocks, int *created, + int create, struct semaphore *sem) { - return obd->obd_fsops->fs_map_inode_page(inode, page, blocks, created, - create); + return obd->obd_fsops->fs_map_inode_pages(inode, page, pages, blocks, + created, create, sem); } -static inline int fs_prep_san_write(struct obd_device *obd, - struct inode *inode, - long *blocks, - int nblocks, - loff_t newsize) +static inline int +fsfilt_write_extents(struct obd_device *obd, struct dentry *dentry, + unsigned long offset, unsigned long blks) +{ + if (obd->obd_fsops->fs_write_extents) + return obd->obd_fsops->fs_write_extents(dentry, + offset, blks); + return 0; +} + +static inline int +fs_prep_san_write(struct obd_device *obd, struct inode *inode, + long *blocks, int nblocks, loff_t newsize) { return obd->obd_fsops->fs_prep_san_write(inode, blocks, nblocks, newsize); } -static inline int fsfilt_read_record(struct obd_device *obd, struct file *file, - void *buf, loff_t size, loff_t *offs) +static inline int +fsfilt_read_record(struct obd_device *obd, struct file *file, + void *buf, loff_t size, loff_t *offs) { return obd->obd_fsops->fs_read_record(file, buf, size, offs); } -static inline int fsfilt_write_record(struct obd_device *obd, struct file *file, - void *buf, loff_t size, loff_t *offs, - int force_sync) +static inline int +llog_fsfilt_read_record(struct llog_ctxt *ctxt, struct file *file, + void *buf, loff_t size, loff_t *offs) { - return obd->obd_fsops->fs_write_record(file, buf, size,offs,force_sync); + return ctxt->loc_fsops->fs_read_record(file, buf, size, offs); } -static inline int fsfilt_setup(struct obd_device *obd, struct super_block *fs) +static inline int +fsfilt_write_record(struct obd_device *obd, struct file *file, + void *buf, loff_t size, loff_t *offs, int force_sync) { - if (obd->obd_fsops->fs_setup) - return obd->obd_fsops->fs_setup(obd, fs); + return obd->obd_fsops->fs_write_record(file, buf, size, offs, + force_sync); +} + +static inline int +llog_fsfilt_write_record(struct llog_ctxt *ctxt, struct file *file, + void *buf, loff_t size, loff_t *offs, + int force_sync) +{ + return ctxt->loc_fsops->fs_write_record(file, buf, size, offs, + force_sync); +} + +static inline int +fsfilt_set_kml_flags(struct obd_device *obd, struct inode *inode) +{ + if (obd->obd_fsops->fs_set_kml_flags) + return obd->obd_fsops->fs_set_kml_flags(inode); + return 0; +} + +static inline int +fsfilt_post_setup(struct obd_device *obd) +{ + if (obd->obd_fsops->fs_post_setup) + return obd->obd_fsops->fs_post_setup(obd, + obd->obd_lvfs_ctxt.pwdmnt); + return 0; +} + +static inline int +fsfilt_post_cleanup(struct obd_device *obd) +{ + if (obd->obd_fsops->fs_post_cleanup) + return obd->obd_fsops->fs_post_cleanup(obd, + obd->obd_lvfs_ctxt.pwdmnt); + return 0; +} + +static inline int +fsfilt_get_ino_write_extents(struct obd_device *obd, + struct super_block *sb, + int ino, char **buf, int *size) +{ + if (obd->obd_fsops->fs_get_ino_write_extents) + return obd->obd_fsops->fs_get_ino_write_extents(sb, ino, + buf, size); + return 0; +} + +static inline int +fsfilt_free_write_extents(struct obd_device *obd, + struct super_block *sb, + int ino, char *buf, int size) +{ + if (obd->obd_fsops->fs_free_write_extents) + return obd->obd_fsops->fs_free_write_extents(sb, ino, + buf, size); + return 0; +} + +static inline int +fsfilt_get_reint_log_ctxt(struct obd_device *obd, + struct super_block *sb, + struct llog_ctxt **ctxt) +{ + if (obd->obd_fsops->fs_get_reint_log_ctxt) + return obd->obd_fsops->fs_get_reint_log_ctxt(sb, ctxt); + return 0; +} + +static inline int +fsfilt_set_ost_flags(struct obd_device *obd, struct super_block *sb) +{ + if (obd->obd_fsops->fs_set_ost_flags) + return obd->obd_fsops->fs_set_ost_flags(sb); + return 0; +} + +static inline int +fsfilt_set_mds_flags(struct obd_device *obd, struct super_block *sb) +{ + if (obd->obd_fsops->fs_set_mds_flags) + return obd->obd_fsops->fs_set_mds_flags(sb); return 0; } -static inline int fsfilt_add_dir_entry(struct obd_device *obd, - struct dentry *dir, - char *name, int namelen, - unsigned long ino, - unsigned long generation, - unsigned mds) +static inline int +fsfilt_add_dir_entry(struct obd_device *obd, struct dentry *dir, + char *name, int namelen, unsigned long ino, + unsigned long generation, unsigned mds) { LASSERT(obd->obd_fsops->fs_add_dir_entry); return obd->obd_fsops->fs_add_dir_entry(obd, dir, name, namelen, ino, generation, mds); } -static inline int fsfilt_del_dir_entry(struct obd_device *obd, - struct dentry *dentry) +static inline int +fsfilt_del_dir_entry(struct obd_device *obd, struct dentry *dentry) { LASSERT(obd->obd_fsops->fs_del_dir_entry); return obd->obd_fsops->fs_del_dir_entry(obd, dentry); diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 16639d7eb..d7c9798 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -54,8 +54,7 @@ # include # include # include /* for strncpy, below */ -# include -# include /* to check for FMODE_EXEC, lest we redefine */ +# include /* to check for FMODE_EXEC, dev_t, lest we redefine */ #else #ifdef __CYGWIN__ # include @@ -68,7 +67,7 @@ #endif /* Defn's shared with user-space. */ -#include +#include /* * this file contains all data structures used in Lustre interfaces: @@ -81,21 +80,6 @@ /* * GENERAL STUFF */ -struct obd_uuid { - __u8 uuid[40]; -}; - -static inline int obd_uuid_equals(struct obd_uuid *u1, struct obd_uuid *u2) -{ - return strcmp(u1->uuid, u2->uuid) == 0; -} - -static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp) -{ - strncpy(uuid->uuid, tmp, sizeof(*uuid)); - uuid->uuid[sizeof(*uuid) - 1] = '\0'; -} - /* FOO_REQUEST_PORTAL is for incoming requests on the FOO * FOO_REPLY_PORTAL is for incoming replies on the FOO * FOO_BULK_PORTAL is for incoming bulk on the FOO @@ -316,7 +300,8 @@ extern void lustre_swab_obdo (struct obdo *o); #define LOV_PATTERN_RAID0 0x001 /* stripes are used round-robin */ #define LOV_PATTERN_RAID1 0x002 /* stripes are mirrors of each other */ #define LOV_PATTERN_FIRST 0x100 /* first stripe is not in round-robin */ - +#define LOV_PATTERN_CMOBD 0x200 + #define lov_ost_data lov_ost_data_v1 struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/ __u64 l_object_id; /* OST object ID */ @@ -381,6 +366,8 @@ struct lov_mds_md_v0 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLEPOCH (0x04000000) /* ->ost write easize is epoch */ #define OBD_MD_FLGRANT (0x08000000) /* ost preallocation space grant */ #define OBD_MD_MDS (0x10000000) /* where an inode lives on */ +#define OBD_MD_FLDIREA (0x20000000) /* dir's extended attribute data */ +#define OBD_MD_REINT (0x40000000) /* reintegrate oa*/ #define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\ OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\ OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE|\ @@ -490,19 +477,22 @@ typedef enum { MDS_DONE_WRITING = 45, MDS_LAST_OPC } mds_cmd_t; + #define MDS_FIRST_OPC MDS_GETATTR /* * Do not exceed 63 */ -#define REINT_SETATTR 1 -#define REINT_CREATE 2 -#define REINT_LINK 3 -#define REINT_UNLINK 4 -#define REINT_RENAME 5 -#define REINT_OPEN 6 -#define REINT_MAX 6 +#define REINT_SETATTR 1 +#define REINT_CREATE 2 +#define REINT_LINK 3 +#define REINT_UNLINK 4 +#define REINT_RENAME 5 +#define REINT_OPEN 6 +#define REINT_CLOSE 7 +#define REINT_WRITE 8 +#define REINT_MAX 8 /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 1 @@ -533,11 +523,6 @@ struct mea { struct ll_fid mea_fids[0]; }; -struct ll_recreate_obj { - __u64 lrc_id; - __u32 lrc_ost_idx; -}; - extern void lustre_swab_ll_fid (struct ll_fid *fid); #define MDS_STATUS_CONN 1 @@ -581,12 +566,28 @@ struct mds_body { extern void lustre_swab_mds_body (struct mds_body *b); +struct lustre_md { + struct mds_body *body; + struct lov_stripe_md *lsm; + struct mea *mea; +}; -/* MDS update records */ +struct ll_uctxt { + __u32 gid1; + __u32 gid2; +}; -//struct mds_update_record_hdr { -// __u32 ur_opcode; -//}; +struct mdc_op_data { + struct ll_fid fid1; + struct ll_fid fid2; + struct ll_uctxt ctxt; + __u64 mod_time; + const char *name; + int namelen; + __u32 create_mode; + struct mea *mea1; /* mea of inode1 */ + struct mea *mea2; /* mea of inode2 */ +}; struct mds_rec_setattr { __u32 sa_opcode; @@ -745,7 +746,8 @@ typedef enum { LCK_PR = 4, LCK_CW = 8, LCK_CR = 16, - LCK_NL = 32 + LCK_NL = 32, + LCK_GROUP = 64 } ldlm_mode_t; struct ldlm_extent { @@ -913,6 +915,8 @@ typedef enum { LLOG_GEN_REC = 0x10640000, LLOG_HDR_MAGIC = 0x10645539, LLOG_LOGID_MAGIC = 0x1064553b, + SMFS_UPDATE_REC = 0x10650000, + CACHE_LRU_REC = 0x10660000, } llog_op_type; /* Log record header - stored in little endian order. @@ -974,13 +978,21 @@ struct llog_size_change_rec { struct llog_gen { __u64 mnt_cnt; __u64 conn_cnt; -} __attribute__((packed)); +}; struct llog_gen_rec { struct llog_rec_hdr lgr_hdr; struct llog_gen lgr_gen; struct llog_rec_tail lgr_tail; -}; +} __attribute__((packed)); + +struct llog_lru_rec { + struct llog_rec_hdr llr_hdr; + struct ll_fid llr_cfid; + struct ll_fid llr_pfid; + struct llog_rec_tail llr_tail; +} __attribute__((packed)); + /* On-disk header structure of each log object, stored in little endian order */ #define LLOG_CHUNK_SIZE 8192 #define LLOG_HEADER_SIZE (96) @@ -1001,7 +1013,7 @@ struct llog_log_hdr { __u32 llh_size; __u32 llh_flags; __u32 llh_cat_idx; - /* for a catlog the first plain slot is next to it */ + /* for a catalog the first plain slot is next to it */ struct obd_uuid llh_tgtuuid; __u32 llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32) - 23]; __u32 llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)]; @@ -1025,6 +1037,7 @@ enum llogd_rpc_ops { LLOG_ORIGIN_HANDLE_CLOSE = 505, LLOG_ORIGIN_CONNECT = 506, LLOG_CATINFO = 507, /* for lfs catinfo */ + LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, }; struct llogd_body { diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 24ad8fb..968a310 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -25,8 +25,6 @@ #ifndef _LUSTRE_LIB_H #define _LUSTRE_LIB_H -#include - #ifndef __KERNEL__ # include # include @@ -42,6 +40,18 @@ #include #include +#ifndef LP_POISON +#if BITS_PER_LONG > 32 +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#else +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif +#endif + #ifndef LPU64 /* x86_64 has 64bit longs and defines u64 as long long */ #if BITS_PER_LONG > 32 && !defined(__x86_64__) @@ -456,6 +466,11 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define ECHO_IOC_ENQUEUE _IOWR('f', 202, long) #define ECHO_IOC_CANCEL _IOWR('f', 203, long) +#define OBD_IOC_CMOBD_SYNC _IOWR('f', 210, long) + +#define OBD_IOC_COBD_CON _IOWR('f', 220, long) +#define OBD_IOC_COBD_COFF _IOWR('f', 221, long) +#define OBD_IOC_COBD_CFLUSH _IOWR('f', 222, long) /* XXX _IOWR('f', 250, long) has been defined in * portals/include/linux/kp30.h for debug, don't use it */ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index ea9a076..5077f8f 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -90,6 +90,11 @@ struct ll_inode_info { struct list_head lli_pending_write_llaps; struct list_head lli_close_item; + + struct file_operations *ll_save_ifop; + struct file_operations *ll_save_ffop; + struct file_operations *ll_save_wfop; + struct file_operations *ll_save_wrfop; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct inode lli_vfs_inode; #endif @@ -140,10 +145,62 @@ enum { LPROC_LL_FILE_OPCODES }; +static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode) +{ + mdc_pack_fid(fid, inode->i_ino, inode->i_generation, + inode->i_mode & S_IFMT); + LASSERT(ll_i2info(inode)); + fid->mds = ll_i2info(inode)->lli_mds; +} + +static inline void ll_i2uctxt(struct ll_uctxt *ctxt, struct inode *i1, + struct inode *i2) +{ + LASSERT(i1); + LASSERT(ctxt); + + if (in_group_p(i1->i_gid)) + ctxt->gid1 = i1->i_gid; + else + ctxt->gid1 = -1; + + if (i2) { + if (in_group_p(i2->i_gid)) + ctxt->gid2 = i2->i_gid; + else + ctxt->gid2 = -1; + } else + ctxt->gid2 = 0; +} + +static inline void +ll_prepare_mdc_op_data(struct mdc_op_data *data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode) +{ + LASSERT(i1); + + ll_i2uctxt(&data->ctxt, i1, i2); + ll_inode2fid(&data->fid1, i1); + + /* it could be directory with mea */ + data->mea1 = ll_i2info(i1)->lli_mea; + + if (i2) { + ll_inode2fid(&data->fid2, i2); + data->mea2 = ll_i2info(i2)->lli_mea; + } + + data->name = name; + data->namelen = namelen; + data->create_mode = mode; + data->mod_time = LTIME_S(CURRENT_TIME); +} + #else #include #endif /* __KERNEL__ */ -#include +#include #endif diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 56e2198..e6a5c14 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -40,6 +40,7 @@ #define LOG_NAME_LIMIT(logname, name) \ snprintf(logname, sizeof(logname), "LOGS/%s", name) +#define LLOG_EEMPTY 4711 struct obd_llogs; @@ -69,18 +70,49 @@ struct llog_handle { } u; }; -#define LLOG_EEMPTY 4711 +/* got from mds_update_record. + * FIXME: maybe some attribute in reint_record and update_record will be + * changed later. */ +/* XXX BUG 3188 -- must return to one set of structures. */ +/* XXX use fixed-sized fields (__u32) instead of dev_t and iattr->gid_t, etc */ + +struct update_record { + __u32 ur_opcode; + __u32 ur_fsuid; + __u32 ur_fsgid; + dev_t ur_rdev; + struct iattr ur_iattr; + struct iattr ur_pattr; + __u32 ur_flags; + __u32 ur_len; +}; + +struct reint_record { + struct update_record u_rec; + char *rec_data1; + int rec1_size; + char *rec_data2; + int rec2_size; +}; + +struct llog_smfs_rec { + struct llog_rec_hdr lsr_hdr; + struct update_record lsr_rec; + struct llog_rec_tail lsr_tail; +}; /* llog.c - general API */ typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); +struct llog_handle *llog_alloc_handle(void); +void llog_free_handle(struct llog_handle *handle); +int llog_cancel_rec(struct llog_handle *loghandle, int index); int llog_init_handle(struct llog_handle *handle, int flags, struct obd_uuid *uuid); +int llog_close(struct llog_handle *cathandle); int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data, void *catdata); -extern struct llog_handle *llog_alloc_handle(void); -extern void llog_free_handle(struct llog_handle *handle); -extern int llog_close(struct llog_handle *cathandle); -extern int llog_cancel_rec(struct llog_handle *loghandle, int index); +int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata); /* llog_cat.c - catalog api */ struct llog_process_data { @@ -91,27 +123,37 @@ struct llog_process_data { struct llog_process_cat_data { int first_idx; int last_idx; - /* to process catlog across zero record */ + /* to process catalog across zero record */ }; +int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, + struct llog_logid *logid); int llog_cat_put(struct llog_handle *cathandle); int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, struct llog_cookie *reccookie, void *buf); int llog_cat_cancel_records(struct llog_handle *cathandle, int count, struct llog_cookie *cookies); int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); +int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); /* llog_obd.c */ -int llog_setup(struct obd_device *, struct obd_llogs *, int, struct obd_device *, - int, struct llog_logid *, struct llog_operations *); -int llog_cleanup(struct llog_ctxt *); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); -int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, struct llog_cookie *logcookies, - int numcookies); -int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags); +int llog_catalog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *reccookie, int, void *data); +int llog_catalog_cancel(struct llog_ctxt *ctxt, int count, struct llog_cookie *, + int flags, void *data); +int llog_catalog_setup(struct llog_ctxt **res, char *name, struct obd_export *exp, + struct lvfs_run_ctxt *, struct fsfilt_operations *fsops, + struct dentry *logs_de, struct dentry *objects_de); +int llog_catalog_cleanup(struct llog_ctxt *ctxt); +int llog_cat_half_bottom(struct llog_cookie *, struct llog_handle *); + +/* llog_lvfs.c */ +int llog_get_cat_list(struct lvfs_run_ctxt *, struct fsfilt_operations *, + char *name, int count, struct llog_catid *idarray); +int llog_put_cat_list(struct lvfs_run_ctxt *, struct fsfilt_operations *, + char *name, int count, struct llog_catid *idarray); +extern struct llog_operations llog_lvfs_ops; int llog_obd_origin_setup(struct obd_device *, struct obd_llogs *, int, struct obd_device *, int, struct llog_logid *); @@ -120,15 +162,21 @@ int llog_obd_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies); -int llog_cat_initialize(struct obd_device *, struct obd_llogs *, int); +int obd_llog_cat_initialize(struct obd_device *, struct obd_llogs *, int, char *); + +/* llog_obd.c - obd llog api */ +int obd_llog_setup(struct obd_device *obd, struct obd_llogs *, int index, + struct obd_device *disk_obd, int count, struct llog_logid *logid, + struct llog_operations *op); int obd_llog_init(struct obd_device *, struct obd_llogs *, struct obd_device *, int, struct llog_catid *); +int obd_llog_cleanup(struct llog_ctxt *); int obd_llog_finish(struct obd_device *, struct obd_llogs *, int); /* llog_ioctl.c */ int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data); -int llog_catlog_list(struct obd_device *obd, int count, +int llog_catalog_list(struct obd_device *obd, int count, struct obd_ioctl_data *data); /* llog_net.c */ @@ -140,58 +188,64 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count, int llog_handle_connect(struct ptlrpc_request *req); /* recov_thread.c */ -int llog_obd_repl_cancel(struct llog_ctxt *ctxt, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags); +int llog_obd_repl_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data); + int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp); int llog_repl_connect(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, struct llog_gen *gen, struct obd_uuid *uuid); struct llog_operations { + int (*lop_setup)(struct obd_device *, struct obd_llogs *, int, + struct obd_device *, int, struct llog_logid *); + + int (*lop_cleanup)(struct llog_ctxt *ctxt); + int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, + struct llog_logid *logid, char *name); + int (*lop_destroy)(struct llog_handle *handle); + int (*lop_close)(struct llog_handle *handle); + + int (*lop_read_header)(struct llog_handle *handle); + int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data); + int (*lop_cancel)(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data); int (*lop_write_rec)(struct llog_handle *loghandle, struct llog_rec_hdr *rec, struct llog_cookie *logcookies, int numcookies, void *, int idx); - int (*lop_destroy)(struct llog_handle *handle); int (*lop_next_block)(struct llog_handle *h, int *curr_idx, int next_idx, __u64 *offset, void *buf, int len); - int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, - struct llog_logid *logid, char *name); - int (*lop_close)(struct llog_handle *handle); - int (*lop_read_header)(struct llog_handle *handle); + int (*lop_prev_block)(struct llog_handle *h, + int prev_idx, void *buf, int len); - int (*lop_setup)(struct obd_device *, struct obd_llogs *, int, - struct obd_device *, int, struct llog_logid *); int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); - int (*lop_cleanup)(struct llog_ctxt *ctxt); - int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); - int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags); int (*lop_connect)(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, struct llog_gen *gen, struct obd_uuid *uuid); - /* XXX add 2 more: commit callbacks and llog recovery functions */ }; -/* llog_lvfs.c */ -extern struct llog_operations llog_lvfs_ops; -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray); - struct llog_ctxt { - int loc_idx; /* my index the obd array of ctxt's */ + /* needed for lvfs based log */ + struct llog_handle *loc_handle; + struct llog_operations *loc_logops; + struct fsfilt_operations *loc_fsops; + struct dentry *loc_logs_dir; + struct dentry *loc_objects_dir; + struct lvfs_run_ctxt *loc_lvfs_ctxt; + + struct obd_device *loc_obd; /* points back to the containing obd */ struct llog_gen loc_gen; - struct obd_device *loc_obd; /* points back to the containing obd*/ + int loc_idx; /* my index the obd array of ctxt's */ + int loc_alone; /* is this llog ctxt has an obd? */ + struct obd_export *loc_exp; - struct obd_import *loc_imp; /* to use in RPC's: can be backward - pointing import */ - struct llog_operations *loc_logops; - struct llog_handle *loc_handle; + struct obd_import *loc_imp; /* to use in RPC's: can be backward + pointing import */ struct llog_canceld_ctxt *loc_llcd; - struct semaphore loc_sem; /* protects loc_llcd */ + struct semaphore loc_sem; /* protects loc_llcd */ void *llog_proc_cb; struct obd_llogs *loc_llogs; }; @@ -219,11 +273,12 @@ static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b) #define LLOG_GEN_INC(gen) ((gen).conn_cnt) ++ #define LLOG_PROC_BREAK 0x0001 +#define LLOG_DEL_RECORD 0x0002 -static inline int llog_obd2ops(struct llog_ctxt *ctxt, +static inline int llog_ctxt2ops(struct llog_ctxt *ctxt, struct llog_operations **lop) { - if (ctxt == NULL) + if (ctxt == NULL) return -ENOTCONN; *lop = ctxt->loc_logops; @@ -239,7 +294,7 @@ static inline int llog_handle2ops(struct llog_handle *loghandle, if (loghandle == NULL) return -EINVAL; - return llog_obd2ops(loghandle->lgh_ctxt, lop); + return llog_ctxt2ops(loghandle->lgh_ctxt, lop); } static inline int llog_data_len(int len) @@ -256,29 +311,36 @@ static inline struct llog_ctxt *llog_get_context(struct obd_llogs *llogs, return llogs->llog_ctxt[index]; } -static inline int llog_write_rec(struct llog_handle *handle, - struct llog_rec_hdr *rec, - struct llog_cookie *logcookies, - int numcookies, void *buf, int idx) +static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, + struct llog_logid *logid, char *name) { struct llog_operations *lop; - int rc, buflen; + int rc; ENTRY; - rc = llog_handle2ops(handle, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); - if (lop->lop_write_rec == NULL) + if (lop->lop_create == NULL) RETURN(-EOPNOTSUPP); - if (buf) - buflen = rec->lrh_len + sizeof(struct llog_rec_hdr) - + sizeof(struct llog_rec_tail); - else - buflen = rec->lrh_len; - LASSERT(size_round(buflen) == buflen); + rc = lop->lop_create(ctxt, res, logid, name); + RETURN(rc); +} - rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); +static inline int llog_destroy(struct llog_handle *handle) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_destroy == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_destroy(handle); RETURN(rc); } @@ -298,44 +360,69 @@ static inline int llog_read_header(struct llog_handle *handle) RETURN(rc); } -static inline int llog_destroy(struct llog_handle *handle) +static inline int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data) { struct llog_operations *lop; int rc; ENTRY; - rc = llog_handle2ops(handle, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); - if (lop->lop_destroy == NULL) + if (lop->lop_add == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_destroy(handle); + rc = lop->lop_add(ctxt, rec, buf, logcookies, numcookies, data); RETURN(rc); } -#if 0 -static inline int llog_cancel(struct obd_export *exp, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags) +static inline int llog_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data) { struct llog_operations *lop; int rc; ENTRY; - rc = llog_handle2ops(loghandle, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); if (lop->lop_cancel == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_cancel(exp, lsm, count, cookies, flags); + rc = lop->lop_cancel(ctxt, count, cookies, flags, data); RETURN(rc); } -#endif -static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, +static inline int llog_write_rec(struct llog_handle *handle, + struct llog_rec_hdr *rec, + struct llog_cookie *logcookies, + int numcookies, void *buf, int idx) +{ + struct llog_operations *lop; + int rc, buflen; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_write_rec == NULL) + RETURN(-EOPNOTSUPP); + + if (buf) + buflen = le32_to_cpu(rec->lrh_len) + sizeof(struct llog_rec_hdr) + + sizeof(struct llog_rec_tail); + else + buflen = le32_to_cpu(rec->lrh_len); + LASSERT(size_round(buflen) == buflen); + + rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); + RETURN(rc); +} + +static inline int llog_next_block(struct llog_handle *loghandle, int *curr_idx, + int next_idx, __u64 *curr_offset, void *buf, int len) { struct llog_operations *lop; @@ -348,25 +435,25 @@ static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, if (lop->lop_next_block == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_next_block(loghandle, cur_idx, next_idx, cur_offset, buf, + rc = lop->lop_next_block(loghandle, curr_idx, next_idx, curr_offset, buf, len); RETURN(rc); } -static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, - struct llog_logid *logid, char *name) +static inline int llog_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) { struct llog_operations *lop; int rc; ENTRY; - rc = llog_obd2ops(ctxt, &lop); + rc = llog_handle2ops(loghandle, &lop); if (rc) RETURN(rc); - if (lop->lop_create == NULL) + if (lop->lop_prev_block == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_create(ctxt, res, logid, name); + rc = lop->lop_prev_block(loghandle, prev_idx, buf, len); RETURN(rc); } @@ -378,7 +465,7 @@ static inline int llog_connect(struct llog_ctxt *ctxt, int count, int rc; ENTRY; - rc = llog_obd2ops(ctxt, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); if (lop->lop_connect == NULL) @@ -388,4 +475,20 @@ static inline int llog_connect(struct llog_ctxt *ctxt, int count, RETURN(rc); } +static inline int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_ctxt2ops(ctxt, &lop); + if (rc) + RETURN(rc); + if (lop->lop_sync == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_sync(ctxt, exp); + RETURN(rc); +} + #endif diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index c4f82dd..9799ca0 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -51,29 +51,6 @@ struct ll_file_data; #define LUSTRE_MDT_NAME "mdt" #define LUSTRE_MDC_NAME "mdc" -struct lustre_md { - struct mds_body *body; - struct lov_stripe_md *lsm; - struct mea *mea; -}; - -struct ll_uctxt { - __u32 gid1; - __u32 gid2; -}; - -struct mdc_op_data { - struct ll_fid fid1; - struct ll_fid fid2; - struct ll_uctxt ctxt; - __u64 mod_time; - const char *name; - int namelen; - __u32 create_mode; - struct mea *mea1; /* mea of inode1 */ - struct mea *mea2; /* mea of inode2 */ -}; - struct mds_update_record { __u32 ur_opcode; struct ll_fid *ur_fid1; @@ -87,18 +64,18 @@ struct mds_update_record { int ur_cookielen; struct llog_cookie *ur_logcookies; struct iattr ur_iattr; - struct obd_ucred ur_uc; + struct lvfs_ucred ur_uc; __u64 ur_rdev; __u32 ur_mode; __u64 ur_time; __u32 ur_flags; }; -#define ur_fsuid ur_uc.ouc_fsuid -#define ur_fsgid ur_uc.ouc_fsgid -#define ur_cap ur_uc.ouc_cap -#define ur_suppgid1 ur_uc.ouc_suppgid1 -#define ur_suppgid2 ur_uc.ouc_suppgid2 +#define _ur_fsuid ur_uc.luc_fsuid +#define _ur_fsgid ur_uc.luc_fsgid +#define _ur_cap ur_uc.luc_cap +#define _ur_suppgid1 ur_uc.luc_suppgid1 +#define _ur_suppgid2 ur_uc.luc_suppgid2 /* i_attr_flags holds the open count in the inode in 2.4 */ //XXX Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please @@ -133,6 +110,13 @@ struct mds_update_record { #define MDS_INCOMPAT_SUPP (0) +#define REAL_MDS_NUMBER 1 +#define CACHE_MDS_NUMBER 0 + +/*flags for indicate the record are come from cmobd reint or + mdc create */ +#define REC_REINT_CREATE 0x0001 + /* Data stored per server at the head of the last_rcvd file. In le32 order. * Try to keep this the same as fsd_server_data so we might one day merge. */ struct mds_server_data { @@ -198,7 +182,7 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover); int it_disposition(struct lookup_intent *it, int flag); void it_set_disposition(struct lookup_intent *it, int flag); int it_open_error(int phase, struct lookup_intent *it); -void mdc_set_lock_data(__u64 *lockh, void *data); +int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data); int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, ldlm_iterator_t it, void *data); int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *, @@ -221,9 +205,8 @@ int mdc_enqueue(struct obd_export *exp, void *cb_data); /* mdc/mdc_request.c */ -int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, - struct obd_export *exp_osc, - struct obd_export *exp_mdc, +int mdc_req2lustre_md(struct obd_export *exp_mdc, struct ptlrpc_request *req, + unsigned int offset, struct obd_export *exp_osc, struct lustre_md *md); int mdc_getstatus(struct obd_export *exp, struct ll_fid *rootfid); int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, @@ -239,9 +222,11 @@ int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags, struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh, struct ptlrpc_request **); struct obd_client_handle; -void mdc_set_open_replay_data(struct obd_client_handle *och, - struct ptlrpc_request *open_req); -void mdc_clear_open_replay_data(struct obd_client_handle *och); +int mdc_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req); +int mdc_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och); int mdc_close(struct obd_export *, struct obdo *, struct obd_client_handle *, struct ptlrpc_request **); int mdc_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, @@ -261,11 +246,12 @@ int mdc_sync(struct obd_export *exp, struct ll_fid *fid, int mdc_create_client(struct obd_uuid uuid, struct ptlrpc_client *cl); /* Store the generation of a newly-created inode in |req| for replay. */ -void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, - int repoff); +int mdc_store_inode_generation(struct obd_export *exp, struct ptlrpc_request *req, + int reqoff, int repoff); int mdc_llog_process(struct obd_export *, char *logname, llog_cb_t, void *data); int mdc_done_writing(struct obd_export *exp, struct obdo *); - +int mdc_reint(struct ptlrpc_request *request, struct mdc_rpc_lock *rpc_lock, + int level); static inline void mdc_pack_fid(struct ll_fid *fid, obd_id ino, __u32 gen, int type) { diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index d7d1bea..9b89859 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -35,7 +35,6 @@ #include // #include #include -#include /* FIXME (for PTL_MD_MAX_IOV) */ #include #include #include @@ -45,26 +44,43 @@ #define PTLRPC_MD_OPTIONS (PTL_MD_EVENT_START_DISABLE | \ PTL_MD_LUSTRE_COMPLETION_SEMANTICS) -/* Define some large-ish defaults for MTU and MAX_IOV if portals ones - * aren't defined (i.e. no limits) or too large */ -#if (defined(PTL_MTU) && (PTL_MTU <= (1 << 20))) -# define PTLRPC_MTU PTL_MTU +/* Define some large-ish maxima for bulk I/O + * CAVEAT EMPTOR, with multinet (i.e. gateways forwarding between networks) + * these limits are system wide and not interface-local. */ +#define PTLRPC_MAX_BRW_SIZE (1 << 20) +#define PTLRPC_MAX_BRW_PAGES 512 + +/* ...reduce to fit... */ + +#if CRAY_PORTALS +/* include a cray header here if relevant + * NB liblustre SIZE/PAGES is affected too, but it merges contiguous + * chunks, so FTTB, it always used contiguous MDs */ #else -# define PTLRPC_MTU (1 << 20) +# include #endif -#if (defined(PTL_MAX_IOV) && (PTL_MAX_IOV <= 512)) -# define PTLRPC_MAX_IOV PTL_MAX_IOV -#else -# define PTLRPC_MAX_IOV 512 + +#if (defined(PTL_MTU) && (PTL_MTU < PTLRPC_MAX_BRW_SIZE)) +# undef PTLRPC_MAX_BRW_SIZE +# define PTLRPC_MAX_BRW_SIZE PTL_MTU #endif +#if (defined(PTL_MD_MAX_IOV) && (PTL_MD_MAX_IOV < PTLRPC_MAX_BRW_PAGES )) +# undef PTLRPC_MAX_BRW_PAGES +# define PTLRPC_MAX_BRW_PAGES PTL_MD_MAX_IOV +#endif + +/* ...and make consistent... */ -/* Define consistent max bulk size/pages */ -#if (PTLRPC_MTU > PTLRPC_MAX_IOV * PAGE_SIZE) -# define PTLRPC_MAX_BRW_PAGES PTLRPC_MAX_IOV -# define PTLRPC_MAX_BRW_SIZE (PTLRPC_MAX_IOV * PAGE_SIZE) +#if (PTLRPC_MAX_BRW_SIZE > PTLRPC_MAX_BRW_PAGES * PAGE_SIZE) +# undef PTLRPC_MAX_BRW_SIZE +# define PTLRPC_MAX_BRW_SIZE (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE) #else -# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MTU / PAGE_SIZE) -# define PTLRPC_MAX_BRW_SIZE PTLRPC_MTU +# undef PTLRPC_MAX_BRW_PAGES +# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE / PAGE_SIZE) +#endif + +#if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) +#error "PTLRPC_MAX_BRW_PAGES isn't a power of two" #endif /* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request @@ -87,7 +103,7 @@ */ #define LDLM_NUM_THREADS min(smp_num_cpus * smp_num_cpus * 8, 64) -#define LDLM_NBUF_MAX 256UL +#define LDLM_NBUF_MAX 512UL #define LDLM_BUFSIZE (8 * 1024) #define LDLM_MAXREQSIZE (5 * 1024) #define LDLM_MAXMEM (num_physpages*(PAGE_SIZE/1024)) @@ -96,7 +112,7 @@ #define MDT_MAX_THREADS 32UL #define MDT_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \ MDT_MAX_THREADS), 2UL) -#define MDS_NBUF_MAX 512UL +#define MDS_NBUF_MAX 4096UL #define MDS_BUFSIZE (8 * 1024) /* Assume file name length = FNAME_MAX = 256 (true for extN). * path name length = PATH_MAX = 4096 @@ -343,8 +359,6 @@ struct ptlrpc_request { /* Spare the preprocessor, spoil the bugs. */ #define FLAG(field, str) (field ? str : "") -#define PTLRPC_REQUEST_COMPLETE(req) ((req)->rq_phase > RQ_PHASE_RPC) - #define DEBUG_REQ_FLAGS(req) \ ((req->rq_phase == RQ_PHASE_NEW) ? "New" : \ (req->rq_phase == RQ_PHASE_RPC) ? "Rpc" : \ @@ -416,7 +430,7 @@ struct ptlrpc_bulk_desc { #if (!CRAY_PORTALS && defined(__KERNEL__)) ptl_kiov_t bd_iov[0]; #else - struct iovec bd_iov[0]; + ptl_md_iovec_t bd_iov[0]; #endif }; @@ -496,6 +510,12 @@ struct ptlrpc_service { struct ptlrpc_srv_ni srv_interfaces[0]; }; +static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str) +{ + LASSERT(p->peer_ni != NULL); + return (portals_nid2str(p->peer_ni->pni_number, p->peer_nid, str)); +} + /* ptlrpc/events.c */ extern struct ptlrpc_ni ptlrpc_interfaces[]; extern int ptlrpc_ninterfaces; @@ -506,6 +526,7 @@ extern void client_bulk_callback (ptl_event_t *ev); extern void request_in_callback(ptl_event_t *ev); extern void reply_out_callback(ptl_event_t *ev); extern void server_bulk_callback (ptl_event_t *ev); +extern int ptlrpc_default_nal(void); /* ptlrpc/connection.c */ void ptlrpc_dump_connections(void); @@ -662,6 +683,23 @@ void *lustre_swab_reqbuf (struct ptlrpc_request *req, int n, int minlen, void *lustre_swab_repbuf (struct ptlrpc_request *req, int n, int minlen, void *swabber); +void lustre_init_msg (struct lustre_msg *msg, int count, + int *lens, char **bufs); +void *mdc_setattr_pack(struct lustre_msg *msg, + struct mdc_op_data *data, + struct iattr *iattr, void *ea, int ealen, + void *ea2, int ea2len); +void *mdc_create_pack(struct lustre_msg *msg, int offset, + struct mdc_op_data *op_data, __u32 mode, __u64 rdev, + const void *data, int datalen); +void *mdc_unlink_pack(struct lustre_msg *msg, int offset, + struct mdc_op_data *data); +void *mdc_link_pack(struct lustre_msg *msg, int offset, + struct mdc_op_data *data); +void *mdc_rename_pack(struct lustre_msg *msg, int offset, + struct mdc_op_data *data, + const char *old, int oldlen, const char *new, int newlen); + /* ldlm/ldlm_lib.c */ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf); int client_obd_cleanup(struct obd_device * obddev, int flags); @@ -690,6 +728,7 @@ void ptlrpc_lprocfs_unregister_obd(struct obd_device *obddev); /* ptlrpc/llog_server.c */ int llog_origin_handle_create(struct ptlrpc_request *req); +int llog_origin_handle_prev_block(struct ptlrpc_request *req); int llog_origin_handle_next_block(struct ptlrpc_request *req); int llog_origin_handle_read_header(struct ptlrpc_request *req); int llog_origin_handle_close(struct ptlrpc_request *req); diff --git a/lustre/include/linux/lustre_smfs.h b/lustre/include/linux/lustre_smfs.h new file mode 100644 index 0000000..444622e --- /dev/null +++ b/lustre/include/linux/lustre_smfs.h @@ -0,0 +1,399 @@ +#ifndef __LUSTRE_SMFS_H +#define __LUSTRE_SMFS_H + +struct smfs_inode_info { + struct inode *smi_inode; + __u32 smi_flags; +}; + +struct journal_operations { + void *(*tr_start)(struct inode *, int op); + void (*tr_commit)(void *handle); +}; + +struct sm_operations { + /* operations on the file store */ + struct super_operations sm_sb_ops; + struct inode_operations sm_dir_iops; + struct inode_operations sm_file_iops; + struct inode_operations sm_sym_iops; + struct file_operations sm_dir_fops; + struct file_operations sm_file_fops; + struct file_operations sm_sym_fops; + struct dentry_operations sm_dentry_ops; + struct journal_operations sm_journal_ops; +}; + +/*smfs rec*/ +typedef int (*smfs_pack_rec_func)(char *buffer, struct dentry *dentry, + struct inode *dir, void *data1, + void *data2, int op); +typedef enum { + PACK_NORMAL = 0, + PACK_OST = 1, + PACK_MDS = 2, + PACK_MAX = 3, +} pack_func_t; + +struct mds_kml_pack_info { + int mpi_bufcount; + int mpi_size[4]; + int mpi_total_size; +}; +struct smfs_super_info { + struct super_block *smsi_sb; + struct vfsmount *smsi_mnt; /* mount the cache kern */ + struct fsfilt_operations *sm_cache_fsfilt; /* fsfilt operations */ + struct fsfilt_operations *sm_fsfilt; /* fsfilt operations */ + struct sm_operations *sm_ops; /* cache ops */ + struct lvfs_run_ctxt *smsi_ctxt; + struct llog_ctxt *smsi_rec_log; /* smfs kml llog */ + struct dentry *smsi_logs_dir; + struct dentry *smsi_objects_dir; + struct dentry *smsi_delete_dir; /* for delete inode dir */ + char *smsi_cache_ftype; /* cache file system type */ + char *smsi_ftype; /* file system type */ + struct obd_export *smsi_exp; /* file system obd exp */ + smfs_pack_rec_func smsi_pack_rec[PACK_MAX]; /* sm_pack_rec type ops */ + __u32 smsi_flags; /* flags */ + __u32 smsi_ops_check; +}; + +#define SMFS_FILE_TYPE "smfs" +#define SMFS_FILE_MAGIC 0x19760218 + +struct smfs_file_info { + struct file *c_file; + int magic; +}; + +struct smfs_proc_args { + struct super_block *sr_sb; + int sr_count; + int sr_flags; + void *sr_data; +}; +struct fs_extent{ + __u32 e_block; /* first logical block extent covers */ + __u32 e_start; /* first physical block extents lives */ + __u32 e_num; /* number of blocks covered by extent */ +}; + +#define I2SMI(inode) ((struct smfs_inode_info *) (&(inode->u.generic_ip))) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define S2SMI(sb) ((struct smfs_super_info *) (&(sb->u.generic_sbp))) +#define S2CSB(sb) (((struct smfs_super_info *) (&(sb->u.generic_sbp)))->smsi_sb) +#else +#define S2SMI(sb) ((struct smfs_super_info *) (sb->s_fs_info)) +#define S2CSB(sb) (((struct smfs_super_info *) (sb->s_fs_info))->smsi_sb) +#endif + +#define I2CI(inode) (((struct smfs_inode_info*) (&(inode->u.generic_ip)))->smi_inode) + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define I2CSB(inode) ((struct smfs_super_info *) (&(inode->i_sb->u.generic_sbp))) +#else +#define I2CSB(inode) ((struct smfs_super_info *) (inode->i_sb->s_fs_info)) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define I2FOPS(inode) (((struct smfs_super_info *) \ + (&(inode->i_sb->u.generic_sbp)))->sm_cache_fsfilt) +#else +#define I2FOPS(inode) (((struct smfs_super_info *) \ + (inode->i_sb->s_fs_info))->sm_cache_fsfilt) +#endif + +#define F2SMFI(file) ((struct smfs_file_info *)((file->private_data))) +#define F2CF(file) (((struct smfs_file_info *) ((file->private_data)))->c_file) +#define SIZE2BLKS(size, inode) ((size + (I2CI(inode)->i_blksize)) >> (I2CI(inode)->i_blkbits)) +#define OFF2BLKS(off, inode) (off >> (I2CI(inode)->i_blkbits)) + +#define SM_DO_REC 0x1 +#define SM_INIT_REC 0x2 +#define SM_CACHE_HOOK 0x4 +#define SM_OVER_WRITE 0x8 +#define SM_DIRTY_WRITE 0x10 + +#define SMFS_DO_REC(smfs_info) (smfs_info->smsi_flags & SM_DO_REC) +#define SMFS_SET_REC(smfs_info) (smfs_info->smsi_flags |= SM_DO_REC) +#define SMFS_CLEAN_REC(smfs_info) (smfs_info->smsi_flags &= ~SM_DO_REC) + +#define SMFS_INIT_REC(smfs_info) (smfs_info->smsi_flags & SM_INIT_REC) +#define SMFS_SET_INIT_REC(smfs_info) (smfs_info->smsi_flags |= SM_INIT_REC) +#define SMFS_CLEAN_INIT_REC(smfs_info) (smfs_info->smsi_flags &= ~SM_INIT_REC) + +#define SMFS_SET_INODE_REC(inode) (I2SMI(inode)->smi_flags |= SM_DO_REC) +#define SMFS_DO_INODE_REC(inode) (I2SMI(inode)->smi_flags & SM_DO_REC) +#define SMFS_CLEAN_INODE_REC(inode) (I2SMI(inode)->smi_flags &= ~SM_DO_REC) + +#define SMFS_CACHE_HOOK(smfs_info) (smfs_info->smsi_flags & SM_CACHE_HOOK) +#define SMFS_SET_CACHE_HOOK(smfs_info) (smfs_info->smsi_flags |= SM_CACHE_HOOK) +#define SMFS_CLEAN_CACHE_HOOK(smfs_info) (smfs_info->smsi_flags &= ~SM_CACHE_HOOK) + +#define SMFS_INODE_CACHE_HOOK(inode) (I2SMI(inode)->smi_flags & SM_CACHE_HOOK) +#define SMFS_SET_INODE_CACHE_HOOK(inode) (I2SMI(inode)->smi_flags |= SM_CACHE_HOOK) +#define SMFS_CLEAN_INODE_CACHE_HOOK(inode) (I2SMI(inode)->smi_flags &= ~SM_CACHE_HOOK) + +#define SMFS_INODE_OVER_WRITE(inode) (I2SMI(inode)->smi_flags & SM_OVER_WRITE) +#define SMFS_SET_INODE_OVER_WRITE(inode) (I2SMI(inode)->smi_flags |= SM_OVER_WRITE) +#define SMFS_CLEAN_INODE_OVER_WRITE(inode) (I2SMI(inode)->smi_flags &= ~SM_OVER_WRITE) + +#define SMFS_INODE_DIRTY_WRITE(inode) (I2SMI(inode)->smi_flags & SM_DIRTY_WRITE) +#define SMFS_SET_INODE_DIRTY_WRITE(inode) (I2SMI(inode)->smi_flags |= SM_DIRTY_WRITE) +#define SMFS_CLEAN_INODE_DIRTY_WRITE(inode) (I2SMI(inode)->smi_flags &= ~SM_DIRTY_WRITE) + + +#define LVFS_SMFS_BACK_ATTR "lvfs_back_attr" + + +#define REC_COUNT_BIT 0 +#define REC_COUNT_MASK 0x01 /*0001*/ +#define REC_OP_BIT 1 +#define REC_OP_MASK 0x06 /*0110*/ +#define REC_WRITE_KML_BIT 3 +#define REC_WRITE_KML_MASK 0x08 /*1000*/ +#define REC_DEC_LINK_BIT 4 +#define REC_DEC_LINK_MASK 0x10 /*10000* different with unlink*/ +#define REC_GET_OID_BIT 5 +#define REC_GET_OID_MASK 0x20 /*100000*/ + +#define REC_PACK_TYPE_BIT 6 +#define REC_PACK_TYPE_MASK 0x1C0 /*111000000*/ + +#define SET_REC_COUNT_FLAGS(flag, count_flag) \ + (flag |= count_flag << REC_COUNT_BIT) +#define GET_REC_COUNT_FLAGS(flag) \ + ((flag & REC_COUNT_MASK) >> REC_COUNT_BIT) + +#define SET_REC_OP_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_OP_BIT) +#define GET_REC_OP_FLAGS(flag) \ + ((flag & REC_OP_MASK) >> REC_OP_BIT) + +#define SET_REC_WRITE_KML_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_OP_BIT) +#define GET_REC_WRITE_KML_FLAGS(flag) \ + ((flag & REC_WRITE_KML_MASK) >> REC_WRITE_KML_BIT) + +#define SET_REC_DEC_LINK_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_DEC_LINK_BIT) +#define GET_REC_DEC_LINK_FLAGS(flag) \ + ((flag & REC_DEC_LINK_MASK) >> REC_DEC_LINK_BIT) + +#define SET_REC_GET_ID_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_GET_OID_BIT) +#define GET_REC_GET_OID_FLAGS(flag) \ + ((flag & REC_GET_OID_MASK) >> REC_GET_OID_BIT) + +#define SET_REC_PACK_TYPE_INDEX(flag, op_flag) \ + (flag |= op_flag << REC_PACK_TYPE_BIT) +#define GET_REC_PACK_TYPE_INDEX(flag) \ + ((flag & REC_PACK_TYPE_MASK) >> REC_PACK_TYPE_BIT) + +#define SMFS_REC_ALL 0x1 +#define SMFS_REC_BY_COUNT 0x0 + +#define SMFS_REINT_REC 0x1 +#define SMFS_UNDO_REC 0x2 + +#define SMFS_WRITE_KML 0x1 +#define SMFS_DEC_LINK 0x1 +#define SMFS_GET_OID 0x1 + +#define SMFS_DO_REINT_REC(flag) \ + (GET_REC_OP_FLAGS(flag) == SMFS_REINT_REC) +#define SMFS_DO_UNDO_REC(flag) \ + (GET_REC_OP_FLAGS(flag) == SMFS_UNDO_REC) +#define SMFS_DO_REC_ALL(flag) \ + (GET_REC_COUNT_FLAGS(flag) == SMFS_REC_ALL) +#define SMFS_DO_REC_BY_COUNT(flag) \ + (GET_REC_COUNT_FLAGS(flag) == SMFS_REC_BY_COUNT) +#define SMFS_DO_WRITE_KML(flag) \ + (GET_REC_WRITE_KML_FLAGS(flag) == SMFS_WRITE_KML) +#define SMFS_DO_DEC_LINK(flag) \ + (GET_REC_DEC_LINK_FLAGS(flag) == SMFS_DEC_LINK) + +#define SMFS_DO_GET_OID(flag) \ + (GET_REC_GET_OID_FLAGS(flag) == SMFS_GET_OID) + +/*DIRTY flags of write ops*/ +#define REINT_EXTENTS_FLAGS "replay_flags" +#define SMFS_DIRTY_WRITE 0x01 +#define SMFS_OVER_WRITE 0x02 + + +static inline void duplicate_inode(struct inode *dst_inode, + struct inode *src_inode) +{ + dst_inode->i_mode = src_inode->i_mode; + dst_inode->i_uid = src_inode->i_uid; + dst_inode->i_gid = src_inode->i_gid; + dst_inode->i_nlink = src_inode->i_nlink; + dst_inode->i_size = src_inode->i_size; + dst_inode->i_atime = src_inode->i_atime; + dst_inode->i_ctime = src_inode->i_ctime; + dst_inode->i_mtime = src_inode->i_mtime; + dst_inode->i_blksize = src_inode->i_blksize; + dst_inode->i_version = src_inode->i_version; + dst_inode->i_state = src_inode->i_state; + dst_inode->i_generation = src_inode->i_generation; + + /* This is to make creating special files working. */ + dst_inode->i_rdev = src_inode->i_rdev; +} + +static inline void post_smfs_inode(struct inode *inode, + struct inode *cache_inode) +{ + if (inode && cache_inode) { + duplicate_inode(inode, cache_inode); + /*Here we must release the cache_inode, + *Otherwise we will have no chance to + *do it + */ + cache_inode->i_state &=~I_LOCK; + inode->i_blocks = cache_inode->i_blocks; + } +} + +static inline void pre_smfs_inode(struct inode *inode, + struct inode *cache_inode) +{ + if (inode && cache_inode) + duplicate_inode(cache_inode, inode); +} + +/* instantiate a file handle to the cache file */ +static inline void duplicate_file(struct file *dst_file, struct file *src_file) +{ + dst_file->f_pos = src_file->f_pos; + dst_file->f_mode = src_file->f_mode; + dst_file->f_flags = src_file->f_flags; + dst_file->f_owner = src_file->f_owner; + dst_file->f_vfsmnt = src_file->f_vfsmnt; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + dst_file->f_reada = src_file->f_reada; + dst_file->f_ramax = src_file->f_ramax; + dst_file->f_raend = src_file->f_raend; + dst_file->f_ralen = src_file->f_ralen; + dst_file->f_rawin = src_file->f_rawin; +#else + dst_file->f_ra = src_file->f_ra; +#endif +} + +static inline void duplicate_sb(struct super_block *dst_sb, + struct super_block *src_sb) +{ + dst_sb->s_blocksize = src_sb->s_blocksize; + dst_sb->s_magic = src_sb->s_magic; + dst_sb->s_blocksize_bits = src_sb->s_blocksize_bits; + dst_sb->s_maxbytes = src_sb->s_maxbytes; +} + +static inline void d_unalloc(struct dentry *dentry) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); +#else + hlist_del_init(&dentry->d_hash); +#endif + dput(dentry); /* this will free the dentry memory */ +} + +static inline struct dentry *pre_smfs_dentry(struct dentry *parent_dentry, + struct inode *cache_inode, + struct dentry *dentry) +{ + struct dentry *cache_dentry = NULL; + + cache_dentry = d_alloc(parent_dentry, &dentry->d_name); + if (!cache_dentry) + RETURN(NULL); + if (!parent_dentry) + cache_dentry->d_parent = cache_dentry; + if (cache_inode) + d_add(cache_dentry, cache_inode); + RETURN(cache_dentry); +} + +static inline void post_smfs_dentry(struct dentry *cache_dentry) +{ + if (!cache_dentry) + return; + if (cache_dentry->d_inode) + igrab(cache_dentry->d_inode); + d_unalloc(cache_dentry); +} + +static inline int lookup_by_path(char *path, int flags, struct nameidata *nd) +{ + struct dentry *dentry = NULL; + int rc = 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (path_init(path, flags, nd)) { +#else + if (path_lookup(path, flags, nd)) { +#endif + rc = path_walk(path, nd); + if (rc) + RETURN(rc); + } else { + RETURN(-EINVAL); + } + + dentry = nd->dentry; + + if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) { + path_release(nd); + RETURN(-ENODEV); + } + RETURN(rc); +} + +/*FIXME there should be more conditions in this check*/ + +static inline int smfs_do_rec(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct smfs_super_info *smfs_info = S2SMI(sb); + + if (SMFS_DO_REC(smfs_info) && SMFS_INIT_REC(smfs_info) && + SMFS_DO_INODE_REC(inode)) + return 1; + return 0; +} +static inline int smfs_cache_hook(struct inode *inode) +{ + struct smfs_super_info *smfs_info = I2CSB(inode); + + if (SMFS_CACHE_HOOK(smfs_info) && SMFS_INIT_REC(smfs_info) && + SMFS_INODE_CACHE_HOOK(inode)) + return 1; + else + return 0; +} +/* XXX BUG 3188 -- must return to one set of opcodes */ +#define SMFS_TRANS_OP(inode, op) \ +{ \ + if (smfs_do_rec(inode)) \ + op = op | 0x10; \ + if (smfs_cache_hook(inode)) \ + op = op | 0x20; \ +} + +extern int smfs_start_rec(struct super_block *sb, struct vfsmount *mnt); +extern int smfs_stop_rec(struct super_block *sb); +extern int smfs_write_extents(struct inode *dir, struct dentry *dentry, + unsigned long from, unsigned long num); +extern int smfs_rec_setattr(struct inode *dir, struct dentry *dentry, + struct iattr *attr); +extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size); +extern int smfs_rec_unpack(struct smfs_proc_args *args, char *record, + char **pbuf, int *opcode); + +#endif /* _LUSTRE_SMFS_H */ diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index b18769f..f51cf73 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -14,12 +14,12 @@ #endif /* simple.c */ -struct obd_ucred { - __u32 ouc_fsuid; - __u32 ouc_fsgid; - __u32 ouc_cap; - __u32 ouc_suppgid1; - __u32 ouc_suppgid2; +struct lvfs_ucred { + __u32 luc_fsuid; + __u32 luc_fsgid; + __u32 luc_cap; + __u32 luc_suppgid1; + __u32 luc_suppgid2; }; struct lvfs_callback_ops { @@ -28,11 +28,11 @@ struct lvfs_callback_ops { #define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA #define OBD_CTXT_DEBUG /* development-only debugging */ -struct obd_run_ctxt { +struct lvfs_run_ctxt { struct vfsmount *pwdmnt; struct dentry *pwd; mm_segment_t fs; - struct obd_ucred ouc; + struct lvfs_ucred luc; int ngroups; struct lvfs_callback_ops cb_ops; #ifdef OBD_CTXT_DEBUG @@ -47,15 +47,16 @@ struct obd_run_ctxt { #endif /* lvfs_common.c */ -struct dentry *lvfs_fid2dentry(struct obd_run_ctxt *, __u64, __u32, __u64 ,void *data); +struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data); -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *cred); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *cred); +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred); +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred); #ifdef __KERNEL__ - +int lvfs_reint(struct super_block *sb, void *r_rec); +int lvfs_undo(struct super_block *sb, void *r_rec); struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix); struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix); int lustre_fread(struct file *file, void *buf, int len, loff_t *off); diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h index 71fc431..5e125a0 100644 --- a/lustre/include/linux/lvfs_linux.h +++ b/lustre/include/linux/lvfs_linux.h @@ -20,8 +20,8 @@ #define l_filp_open filp_open -struct obd_run_ctxt; -struct l_file *l_dentry_open(struct obd_run_ctxt *, struct l_dentry *, +struct lvfs_run_ctxt; +struct l_file *l_dentry_open(struct lvfs_run_ctxt *, struct l_dentry *, int flags); struct l_linux_dirent { diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index ea6f615..b9b1d2e 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -30,6 +30,7 @@ # include #endif +#include #include #include #include @@ -57,9 +58,10 @@ struct lov_oinfo { /* per-stripe data structure */ struct list_head loi_read_item; int loi_kms_valid:1; - __u64 loi_kms; /* known minimum size */ - __u64 loi_rss; /* recently seen size */ - __u64 loi_mtime; /* recently seen mtime */ + __u64 loi_kms; /* known minimum size */ + __u64 loi_rss; /* recently seen size */ + __u64 loi_mtime; /* recently seen mtime */ + __u64 loi_blocks; /* recently seen blocks */ }; static inline void loi_init(struct lov_oinfo *loi) @@ -123,7 +125,7 @@ struct obd_async_page_ops { int (*ap_make_ready)(void *data, int cmd); int (*ap_refresh_count)(void *data, int cmd); void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa); - void (*ap_completion)(void *data, int cmd, int rc); + void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); }; /* the `oig' is passed down from a caller of obd rw methods. the callee @@ -155,8 +157,6 @@ struct obd_histogram { unsigned long oh_buckets[OBD_HIST_MAX]; }; -/* Individual type definitions */ - struct ost_server_data; #define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ @@ -182,7 +182,7 @@ struct filter_obd { int fo_group_count; struct dentry *fo_dentry_O; /* the "O"bject directory dentry */ - struct dentry **fo_groups; /* dentries for each group dir */ + struct dentry **fo_groups; /* dentries for each group dir */ struct filter_subdirs *fo_subdirs; /* subdir array per group */ __u64 *fo_last_objids; // per-group last created objid struct file **fo_last_objid_files; @@ -194,6 +194,9 @@ struct filter_obd { unsigned long *fo_last_rcvd_slots; __u64 fo_mount_count; + unsigned int fo_destroy_in_progress:1; + struct semaphore fo_create_lock; + struct file_operations *fo_fop; struct inode_operations *fo_iop; struct address_space_operations *fo_aops; @@ -310,7 +313,7 @@ struct mds_obd { struct dentry *mds_logs_dir; struct dentry *mds_objects_dir; struct llog_handle *mds_cfg_llh; -// struct llog_handle *mds_catalog; +// struct llog_handle *mds_catalog; struct obd_device *mds_osc_obd; /* XXX lov_obd */ struct obd_uuid mds_lov_uuid; char *mds_profile; @@ -323,11 +326,12 @@ struct mds_obd { struct file *mds_lov_objid_filp; unsigned long *mds_client_bitmap; struct semaphore mds_orphan_recovery_sem; - + /*add mds num here for real mds and cache mds create + FIXME later will be totally fixed by b_cmd*/ + int mds_num; atomic_t mds_open_count; char *mds_lmv_name; - int mds_num; /* number in cluster */ struct obd_device *mds_lmv_obd; /* XXX lmv_obd */ struct obd_export *mds_lmv_exp; /* XXX lov_exp */ struct ptlrpc_service *mds_create_service; @@ -391,8 +395,10 @@ struct echo_client_obd { }; struct cache_obd { - struct obd_export *cobd_target_exp;/* local connection to target obd */ + struct obd_export *cobd_real_exp;/* local connection to target obd */ struct obd_export *cobd_cache_exp; /* local connection to cache obd */ + int refcount; + int cache_on; }; struct lov_tgt_desc { @@ -438,6 +444,15 @@ struct niobuf_local { int rc; }; +struct cache_manager_obd { + struct obd_device *cm_master_obd; /* master lov */ + struct obd_export *cm_master_exp; + struct obd_device *cm_cache_obd; /* cache obdfilter */ + struct obd_export *cm_cache_exp; + int cm_master_group; /* master group*/ + struct cmobd_write_service *cm_write_srv; +}; + /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ #define N_LOCAL_TEMP_PAGE 0x10000000 @@ -498,6 +513,7 @@ enum llog_ctxt_id { LLOG_RD1_REPL_CTXT = 9, LLOG_TEST_ORIG_CTXT = 10, LLOG_TEST_REPL_CTXT = 11, + LLOG_REINT_ORIG_CTXT = 12, LLOG_MAX_CTXTS }; @@ -514,9 +530,9 @@ struct obd_device { struct obd_uuid obd_uuid; int obd_minor; - int obd_attached:1, obd_set_up:1, obd_recovering:1, - obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1, - obd_no_recov:1, obd_stopping:1; + unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1, + obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1, + obd_no_recov:1, obd_stopping:1; atomic_t obd_refcount; wait_queue_head_t obd_refcount_waitq; struct proc_dir_entry *obd_proc_entry; @@ -531,8 +547,9 @@ struct obd_device { spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; unsigned long obd_osfs_age; - struct obd_run_ctxt obd_ctxt; + struct lvfs_run_ctxt obd_lvfs_ctxt; struct obd_llogs obd_llogs; + struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; struct obd_device *obd_observer; struct obd_export *obd_self_export; @@ -566,6 +583,7 @@ struct obd_device { struct ptlbd_obd ptlbd; struct mgmtcli_obd mgmtcli; struct lmv_obd lmv; + struct cache_manager_obd cmobd; } u; /* Fields used by LProcFS */ unsigned int obd_cntr_base; @@ -676,6 +694,8 @@ struct obd_ops { int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, struct obd_trans_info *oti, int rc); + int (*o_write_extents)(struct obd_export *exp, struct obd_ioobj *obj, + int niocount, struct niobuf_local *local,int rc); int (*o_enqueue)(struct obd_export *, struct lov_stripe_md *, __u32 type, ldlm_policy_data_t *, __u32 mode, int *flags, void *bl_cb, void *cp_cb, void *gl_cb, @@ -703,6 +723,7 @@ struct obd_ops { int (*o_llog_finish)(struct obd_device *, struct obd_llogs *, int); int (*o_llog_connect)(struct obd_device *, struct llogd_conn_body *); + /* metadata-only methods */ int (*o_pin)(struct obd_export *, obd_id ino, __u32 gen, int type, struct obd_client_handle *, int flag); @@ -770,7 +791,26 @@ struct md_ops { struct ptlrpc_request **); int (*m_valid_attrs)(struct obd_export *, struct ll_fid *); struct obd_device * (*m_get_real_obd)(struct obd_export *, - char *name, int len); + char *name, int len); + + int (*m_req2lustre_md)(struct obd_export *exp, + struct ptlrpc_request *req, unsigned int offset, + struct obd_export *osc_exp, struct lustre_md *md); + int (*m_set_open_replay_data)(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req); + int (*m_clear_open_replay_data)(struct obd_export *exp, + struct obd_client_handle *och); + int (*m_store_inode_generation)(struct obd_export *exp, + struct ptlrpc_request *req, int reqoff, + int repoff); + int (*m_set_lock_data)(struct obd_export *exp, __u64 *l, void *data); + + /* + * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line + * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. + * Also, add a wrapper function in include/linux/obd_class.h. + */ }; static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 5384a68..086e9b9 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -64,16 +64,16 @@ int class_name2dev(char *name); struct obd_device *class_name2obd(char *name); int class_uuid2dev(struct obd_uuid *uuid); struct obd_device *class_uuid2obd(struct obd_uuid *uuid); -struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, +struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, char * typ_name, struct obd_uuid *grp_uuid); -struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, +struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next); int oig_init(struct obd_io_group **oig); void oig_add_one(struct obd_io_group *oig, struct oig_callback_context *occ); -void oig_complete_one(struct obd_io_group *oig, +void oig_complete_one(struct obd_io_group *oig, struct oig_callback_context *occ, int rc); void oig_release(struct obd_io_group *oig); int oig_wait(struct obd_io_group *oig); @@ -91,9 +91,9 @@ struct config_llog_instance { struct obd_uuid cfg_uuid; ptl_nid_t cfg_local_nid; }; -int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, +int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, struct config_llog_instance *cfg); -int class_config_dump_llog(struct llog_ctxt *ctxt, char *name, +int class_config_dump_llog(struct llog_ctxt *ctxt, char *name, struct config_llog_instance *cfg); struct lustre_profile { @@ -633,7 +633,7 @@ obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr) { LASSERT(exp->exp_obd); - return lvfs_fid2dentry(&exp->exp_obd->obd_ctxt, id_ino, gen, gr, + return lvfs_fid2dentry(&exp->exp_obd->obd_lvfs_ctxt, id_ino, gen, gr, exp->exp_obd); } @@ -641,6 +641,9 @@ obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr) #define time_before(t1, t2) ((long)t2 - (long)t1 > 0) #endif +/* @max_age is the oldest time in jiffies that we accept using a cached data. + * If the cache is older than @max_age we will get a new value from the + * target. Use a value of "jiffies + HZ" to guarantee freshness. */ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, unsigned long max_age) { @@ -656,10 +659,12 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age); if (time_before(obd->obd_osfs_age, max_age)) { rc = OBP(obd, statfs)(obd, osfs, max_age); - spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); - obd->obd_osfs_age = jiffies; - spin_unlock(&obd->obd_osfs_lock); + if (rc == 0) { + spin_lock(&obd->obd_osfs_lock); + memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); + obd->obd_osfs_age = jiffies; + spin_unlock(&obd->obd_osfs_lock); + } } else { CDEBUG(D_SUPER, "using cached obd_statfs data\n"); spin_lock(&obd->obd_osfs_lock); @@ -670,7 +675,7 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, } static inline int obd_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, + struct lov_stripe_md *ea, obd_size start, obd_size end) { int rc; @@ -739,11 +744,11 @@ static inline int obd_brw_async(int cmd, struct obd_export *exp, RETURN(rc); } -static inline int obd_prep_async_page(struct obd_export *exp, +static inline int obd_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct page *page, obd_off offset, - struct obd_async_page_ops *ops, + struct lov_oinfo *loi, + struct page *page, obd_off offset, + struct obd_async_page_ops *ops, void *data, void **res) { int ret; @@ -786,16 +791,16 @@ static inline int obd_set_async_flags(struct obd_export *exp, OBD_CHECK_OP(exp->exp_obd, set_async_flags, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(exp->exp_obd, set_async_flags); - rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie, + rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie, async_flags); RETURN(rc); } -static inline int obd_queue_group_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig, - void *cookie, int cmd, obd_off off, +static inline int obd_queue_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, int count, obd_flag brw_flags, obd_flag async_flags) { @@ -806,14 +811,14 @@ static inline int obd_queue_group_io(struct obd_export *exp, OBD_COUNTER_INCREMENT(exp->exp_obd, queue_group_io); LASSERT(cmd & OBD_BRW_RWMASK); - rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie, + rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie, cmd, off, count, brw_flags, async_flags); RETURN(rc); } -static inline int obd_trigger_group_io(struct obd_export *exp, - struct lov_stripe_md *lsm, +static inline int obd_trigger_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, struct lov_oinfo *loi, struct obd_io_group *oig) { @@ -873,6 +878,21 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, RETURN(rc); } +static inline int obd_write_extents(struct obd_export *exp, struct obd_ioobj *obj, + int niocount, struct niobuf_local *local, int rc) +{ + ENTRY; + + /* there are cases when write_extents is not implemented. */ + if (!OBP(exp->exp_obd, write_extents)) + RETURN(0); + + OBD_COUNTER_INCREMENT(exp->exp_obd, write_extents); + + rc = OBP(exp->exp_obd, write_extents)(exp, obj, niocount, local, rc); + RETURN(rc); +} + static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -920,7 +940,7 @@ static inline int obd_match(struct obd_export *exp, struct lov_stripe_md *ea, } static inline int obd_change_cbdata(struct obd_export *exp, - struct lov_stripe_md *lsm, + struct lov_stripe_md *lsm, ldlm_iterator_t it, void *data) { int rc; @@ -1079,8 +1099,8 @@ static inline int md_getstatus(struct obd_export *exp, struct ll_fid *fid) } static inline int md_getattr(struct obd_export *exp, struct ll_fid *fid, - unsigned long valid, unsigned int ea_size, - struct ptlrpc_request **request) + unsigned long valid, unsigned int ea_size, + struct ptlrpc_request **request) { int rc; EXP_CHECK_MD_OP(exp, getattr); @@ -1106,8 +1126,13 @@ static inline int md_change_cbdata_name(struct obd_export *exp, ldlm_iterator_t it, void *data) { int rc; + + /* this seem to be needed only for lmv. */ + if (!MDP(exp->exp_obd, change_cbdata_name)) + return 0; + ENTRY; - EXP_CHECK_MD_OP(exp, change_cbdata_name); + MD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata_name); rc = MDP(exp->exp_obd, change_cbdata_name)(exp, fid, name, namelen, fid2, it, data); @@ -1115,8 +1140,8 @@ static inline int md_change_cbdata_name(struct obd_export *exp, } static inline int md_close(struct obd_export *exp, struct obdo *obdo, - struct obd_client_handle *och, - struct ptlrpc_request **request) + struct obd_client_handle *och, + struct ptlrpc_request **request) { int rc; ENTRY; @@ -1164,9 +1189,9 @@ static inline int md_enqueue(struct obd_export *exp, int lock_type, EXP_CHECK_MD_OP(exp, enqueue); MD_COUNTER_INCREMENT(exp->exp_obd, enqueue); rc = MDP(exp->exp_obd, enqueue)(exp, lock_type, it, lock_mode, - data, lockh, lmm, lmmsize, - cb_completion, cb_blocking, - cb_data); + data, lockh, lmm, lmmsize, + cb_completion, cb_blocking, + cb_data); RETURN(rc); } @@ -1202,7 +1227,7 @@ static inline int md_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, } static inline int md_link(struct obd_export *exp, struct mdc_op_data *data, - struct ptlrpc_request **request) + struct ptlrpc_request **request) { int rc; ENTRY; @@ -1212,11 +1237,10 @@ static inline int md_link(struct obd_export *exp, struct mdc_op_data *data, RETURN(rc); } -static inline int md_rename(struct obd_export *exp, - struct mdc_op_data *data, - const char *old, int oldlen, - const char *new, int newlen, - struct ptlrpc_request **request) +static inline int md_rename(struct obd_export *exp, struct mdc_op_data *data, + const char *old, int oldlen, + const char *new, int newlen, + struct ptlrpc_request **request) { int rc; ENTRY; @@ -1293,6 +1317,56 @@ static inline int md_valid_attrs(struct obd_export *exp, struct ll_fid *fid) return MDP(exp->exp_obd, valid_attrs)(exp, fid); } +static inline int md_req2lustre_md(struct obd_export *exp, + struct ptlrpc_request *req, + unsigned int offset, + struct obd_export *osc_exp, + struct lustre_md *md) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, req2lustre_md); + MD_COUNTER_INCREMENT(exp->exp_obd, req2lustre_md); + return MDP(exp->exp_obd, req2lustre_md)(exp, req, offset, osc_exp, md); +} + +static inline int md_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, set_open_replay_data); + MD_COUNTER_INCREMENT(exp->exp_obd, set_open_replay_data); + return MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req); +} + +static inline int md_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, clear_open_replay_data); + MD_COUNTER_INCREMENT(exp->exp_obd, clear_open_replay_data); + return MDP(exp->exp_obd, clear_open_replay_data)(exp, och); +} + +static inline int md_store_inode_generation(struct obd_export *exp, + struct ptlrpc_request *req, + int reqoff, int repoff) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, store_inode_generation); + MD_COUNTER_INCREMENT(exp->exp_obd, store_inode_generation); + return MDP(exp->exp_obd, store_inode_generation)(exp, req, + reqoff, repoff); +} + +static inline int md_set_lock_data(struct obd_export *exp, __u64 *l, void *data) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, set_lock_data); + MD_COUNTER_INCREMENT(exp->exp_obd, set_lock_data); + return MDP(exp->exp_obd, set_lock_data)(exp, l, data); +} + /* OBD Metadata Support */ extern int obd_init_caches(void); @@ -1343,14 +1417,14 @@ typedef __u8 class_uuid_t[16]; void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); /* lustre_peer.c */ -int lustre_uuid_to_peer(char *uuid, ptl_handle_ni_t *peer_ni, ptl_nid_t *peer_nid); +int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid); int class_add_uuid(char *uuid, __u64 nid, __u32 nal); int class_del_uuid (char *uuid); void class_init_uuidlist(void); void class_exit_uuidlist(void); /* mea.c */ -int mea_name2idx(struct mea *, char *, int); -int raw_name2idx(int, const char *, int); +int mea_name2idx(struct mea *mea, char *name, int namelen); +int raw_name2idx(int count, const char *name, int namelen); #endif /* __LINUX_OBD_CLASS_H */ diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index a186aa3..9d7957e 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -113,6 +113,8 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_LDLM_BL_CALLBACK 0x305 #define OBD_FAIL_LDLM_CP_CALLBACK 0x306 #define OBD_FAIL_LDLM_GL_CALLBACK 0x307 +#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 +#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 #define OBD_FAIL_OSC 0x400 #define OBD_FAIL_OSC_BRW_READ_BULK 0x401 diff --git a/lustre/include/lustre/.cvsignore b/lustre/include/lustre/.cvsignore new file mode 100644 index 0000000..282522d --- /dev/null +++ b/lustre/include/lustre/.cvsignore @@ -0,0 +1,2 @@ +Makefile +Makefile.in diff --git a/lustre/portals/knals/Makefile.am b/lustre/include/lustre/Makefile.am similarity index 59% rename from lustre/portals/knals/Makefile.am rename to lustre/include/lustre/Makefile.am index df6ee5c..a785ada 100644 --- a/lustre/portals/knals/Makefile.am +++ b/lustre/include/lustre/Makefile.am @@ -3,5 +3,7 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -DIST_SUBDIRS= socknal qswnal gmnal scimacnal ibnal -SUBDIRS= socknal @QSWNAL@ @GMNAL@ @SCIMACNAL@ @IBNAL@ + +pkginclude_HEADERS = lustre_user.h liblustreapi.h + +EXTRA_DIST = $(pkginclude_HEADERS) diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h new file mode 100644 index 0000000..350bd09 --- /dev/null +++ b/lustre/include/lustre/liblustreapi.h @@ -0,0 +1,42 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Peter J. Braam + * Author: Phil Schwan + * Author: Brian Behlendorf + * Author: Robert Read + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef _LIBLUSTREAPI_H_ +#define _LIBLUSTREAPI_H_ + +#include + +/* liblustreapi.c */ +extern int llapi_file_create(char *name, long stripe_size, int stripe_offset, + int stripe_count, int stripe_pattern); +extern int llapi_file_get_stripe(char *path, struct lov_user_md *lum); +extern int llapi_find(char *path, struct obd_uuid *obduuid, int recursive, + int verbose, int quiet); +extern int llapi_target_check(int num_types, char **obd_types, char *dir); +extern int llapi_catinfo(char *dir, char *keyword, char *node_name); +extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count); +extern int llapi_is_lustre_mnttype(char *type); + +#endif diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/lustre/lustre_user.h similarity index 78% rename from lustre/include/linux/lustre_user.h rename to lustre/include/lustre/lustre_user.h index c380fe9..804c163 100644 --- a/lustre/include/linux/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -20,10 +20,14 @@ * * Lustre public user-space interface definitions. */ - #ifndef _LUSTRE_USER_H #define _LUSTRE_USER_H #include +#ifdef __KERNEL__ +#include +#else +#include +#endif #define IOC_MDC_TYPE 'i' #define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) @@ -38,11 +42,13 @@ #define LL_IOC_CW_LOCK _IOW ('f', 158, long) #define LL_IOC_CW_UNLOCK _IOW ('f', 159, long) #define LL_IOC_MDC_MKDIRSTRIPE _IOW ('f', 160, long) +#define LL_IOC_GROUP_LOCK _IOW ('f', 161, long) +#define LL_IOC_GROUP_UNLOCK _IOW ('f', 162, long) -#define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ +#define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ -#define LL_FILE_IGNORE_LOCK 0x00000001 -#define LL_FILE_CW_LOCKED 0x00000002 +#define LL_FILE_IGNORE_LOCK 0x00000001 +#define LL_FILE_GROUP_LOCKED 0x00000002 #define LOV_USER_MAGIC_V1 0x0BD10BD0 #define LOV_USER_MAGIC LOV_USER_MAGIC_V1 @@ -55,9 +61,8 @@ struct lov_user_ost_data_v1 { /* per-stripe data structure */ __u64 l_object_id; /* OST object ID */ __u64 l_object_gr; /* OST object group (creating MDS number) */ - __u32 l_ost_generation; /* generation of this OST index */ - __u16 l_ost_idx; /* OST index in LOV */ - __u16 l_reserved2; + __u32 l_ost_gen; /* generation of this OST index */ + __u32 l_ost_idx; /* OST index in LOV */ } __attribute__((packed)); #define lov_user_md lov_user_md_v1 @@ -84,4 +89,24 @@ extern int op_create_file(char *name, long stripe_size, int stripe_offset, extern int op_create_dir(char *name, int stripe_count); extern int get_file_stripe(char *path, struct lov_user_md *lum); +struct ll_recreate_obj { + __u64 lrc_id; + __u32 lrc_ost_idx; +}; + +struct obd_uuid { + __u8 uuid[40]; +}; + +static inline int obd_uuid_equals(struct obd_uuid *u1, struct obd_uuid *u2) +{ + return strcmp(u1->uuid, u2->uuid) == 0; +} + +static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp) +{ + strncpy(uuid->uuid, tmp, sizeof(*uuid)); + uuid->uuid[sizeof(*uuid) - 1] = '\0'; +} + #endif /* _LUSTRE_USER_H */ diff --git a/lustre/install-sh b/lustre/install-sh deleted file mode 100755 index e9de238..0000000 --- a/lustre/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/lustre/kernel-tests/.cvsignore b/lustre/kernel-tests/.cvsignore new file mode 100644 index 0000000..124a2771 --- /dev/null +++ b/lustre/kernel-tests/.cvsignore @@ -0,0 +1,5 @@ +.*.cmd +.*.flags +*.ko +*.c +.tmp_versions diff --git a/lustre/kernel-tests/Makefile b/lustre/kernel-tests/Makefile new file mode 100644 index 0000000..7360935 --- /dev/null +++ b/lustre/kernel-tests/Makefile @@ -0,0 +1,24 @@ +ifeq ($(PATCHLEVEL),) + +ifneq ($(LUSTRE_LINUX_CONFIG),) +include $(LUSTRE_LINUX_CONFIG) +endif +include Makefile + +else + +ifneq ($(LUSTRE_KERNEL_TEST),) +ifeq ($(PATCHLEVEL),4) +all: $(LUSTRE_KERNEL_TEST) +else +extra-y = $(LUSTRE_KERNEL_TEST) +endif +endif + +obj-m := conftest.o + +ifeq ($(PATCHLEVEL),4) +include $(TOPDIR)/Rules.make +endif + +endif diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.18-chaos-2.4.18-i686-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.18-chaos-2.4.18-i686-smp.config new file mode 100644 index 0000000..db8f485 --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.18-chaos-2.4.18-i686-smp.config @@ -0,0 +1,1103 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +CONFIG_ISA=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +CONFIG_LOLAT=y +# CONFIG_LOLAT_SYSCTL is not set +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMIII is not set +CONFIG_MPENTIUM4=y +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MELAN is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_TSC=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_MCE=y +# CONFIG_CPU_FREQ is not set +# CONFIG_USE_IRQ_STACKS is not set +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +CONFIG_MICROCODE=m +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m +# CONFIG_E820_PROC is not set +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_HIGHMEM=y +CONFIG_HIGHIO=y +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +CONFIG_SMP=y +# CONFIG_MULTIQUAD is not set +CONFIG_HAVE_DEC_LOCK=y + +# +# General setup +# +CONFIG_HZ=100 +CONFIG_NET=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_NAMES=y + +# +# Performance-monitoring counters support +# +CONFIG_PERFCTR=m +CONFIG_KPERFCTR=y +# CONFIG_PERFCTR_DEBUG is not set +# CONFIG_PERFCTR_INIT_TESTS is not set +CONFIG_PERFCTR_VIRTUAL=y +CONFIG_PERFCTR_GLOBAL=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_HOTPLUG is not set +# CONFIG_PCMCIA is not set +# CONFIG_HOTPLUG_PCI is not set +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=m +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +# CONFIG_IKCONFIG is not set +CONFIG_PM=y + +# +# Additional device driver support +# +# CONFIG_CIPE is not set +# CONFIG_CRYPTO_AEP is not set +# CONFIG_MEGARAC is not set +CONFIG_FC_QLA2200=m +CONFIG_FC_QLA2300=m +# CONFIG_SCSI_ISCSI is not set +CONFIG_IBMASM=m +CONFIG_IBMSER=m +CONFIG_NEW_DIGI_EPCA=m +CONFIG_NVIDIA=m +# CONFIG_ACPI is not set +CONFIG_APM=y +CONFIG_APM_IGNORE_USER_SUSPEND=y +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +CONFIG_APM_RTC_IS_GMT=y +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# Binary emulation of other systems +# +# CONFIG_ABI is not set +# CONFIG_ABI_SVR4 is not set + +# +# Support for foreign binary formats +# +# CONFIG_BINFMT_COFF is not set +# CONFIG_BINFMT_XOUT is not set +# CONFIG_BINFMT_XOUT_X286 is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_PARTITIONS is not set +# CONFIG_MTD_CONCAT is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=m +# CONFIG_MTD_BLOCK is not set +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +CONFIG_MTD_JEDECPROBE=y +CONFIG_MTD_GEN_PROBE=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_NOSWAP=y +# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set +CONFIG_MTD_CFI_GEOMETRY=y +CONFIG_MTD_CFI_B1=y +# CONFIG_MTD_CFI_B2 is not set +# CONFIG_MTD_CFI_B4 is not set +# CONFIG_MTD_CFI_B8 is not set +CONFIG_MTD_CFI_I1=y +# CONFIG_MTD_CFI_I2 is not set +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +# CONFIG_MTD_RAM is not set +CONFIG_MTD_ROM=y +# CONFIG_MTD_ABSENT is not set +# CONFIG_MTD_OBSOLETE_CHIPS is not set +# CONFIG_MTD_AMDSTD is not set +# CONFIG_MTD_SHARP is not set +# CONFIG_MTD_JEDEC is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_PHYSMAP is not set +# CONFIG_MTD_PNC2000 is not set +# CONFIG_MTD_SC520CDP is not set +# CONFIG_MTD_NETSC520 is not set +# CONFIG_MTD_SBC_GXX is not set +# CONFIG_MTD_ELAN_104NC is not set +# CONFIG_MTD_DILNETPC is not set +# CONFIG_MTD_MIXMEM is not set +# CONFIG_MTD_OCTAGON is not set +# CONFIG_MTD_VMAX is not set +# CONFIG_MTD_L440GX is not set +# CONFIG_MTD_AMD766ROM is not set +CONFIG_MTD_ICH2ROM=m +# CONFIG_MTD_PCI is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLKMTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC1000 is not set +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOCPROBE is not set + +# +# NAND Flash Device Drivers +# +# CONFIG_MTD_NAND is not set + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_PC_CML1=m +# CONFIG_PARPORT_SERIAL is not set +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_AMIGA is not set +# CONFIG_PARPORT_MFC3 is not set +# CONFIG_PARPORT_ATARI is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_SUNBPP is not set +# CONFIG_PARPORT_OTHER is not set +CONFIG_PARPORT_1284=y + +# +# Plug and Play configuration +# +CONFIG_PNP=y +CONFIG_ISAPNP=y +# CONFIG_PNPBIOS is not set + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_CISS_SCSI_TAPE is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_ENBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID5 is not set +# CONFIG_MD_MULTIPATH is not set +CONFIG_BLK_DEV_LVM=m + +# +# Cryptography support (CryptoAPI) +# +# CONFIG_CRYPTO is not set +# CONFIG_CIPHERS is not set +# CONFIG_CRYPTODEV is not set +# CONFIG_CRYPTOLOOP is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_TUX is not set +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_ROUTE_LARGE_TABLES=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +CONFIG_SYN_COOKIES=y + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +# CONFIG_IP_NF_FTP is not set +# CONFIG_IP_NF_IRC is not set +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +# CONFIG_IP_NF_MATCH_LIMIT is not set +# CONFIG_IP_NF_MATCH_MAC is not set +# CONFIG_IP_NF_MATCH_MARK is not set +# CONFIG_IP_NF_MATCH_MULTIPORT is not set +# CONFIG_IP_NF_MATCH_TOS is not set +# CONFIG_IP_NF_MATCH_AH_ESP is not set +# CONFIG_IP_NF_MATCH_LENGTH is not set +# CONFIG_IP_NF_MATCH_TTL is not set +# CONFIG_IP_NF_MATCH_TCPMSS is not set +CONFIG_IP_NF_MATCH_STATE=m +# CONFIG_IP_NF_MATCH_UNCLEAN is not set +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_FILTER=m +# CONFIG_IP_NF_TARGET_REJECT is not set +# CONFIG_IP_NF_TARGET_MIRROR is not set +# CONFIG_IP_NF_NAT is not set +# CONFIG_IP_NF_MANGLE is not set +CONFIG_IP_NF_TARGET_LOG=m +# CONFIG_IP_NF_TARGET_ULOG is not set +# CONFIG_IP_NF_TARGET_TCPMSS is not set +# CONFIG_IP_NF_ARPTABLES is not set +# CONFIG_IP_NF_COMPAT_IPCHAINS is not set +# CONFIG_IP_NF_COMPAT_IPFWADM is not set + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +CONFIG_KHTTPD=m +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set + +# +# Appletalk devices +# +# CONFIG_DEV_APPLETALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +CONFIG_NET_PKTGEN=m + +# +# Telephony Support +# +# CONFIG_PHONE is not set +# CONFIG_PHONE_IXJ is not set +# CONFIG_PHONE_IXJ_PCMCIA is not set + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_IDEDISK_STROKE is not set +# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set +# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set +# CONFIG_BLK_DEV_IDEDISK_IBM is not set +# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set +# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set +# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set +# CONFIG_BLK_DEV_IDEDISK_WD is not set +# CONFIG_BLK_DEV_COMMERIAL is not set +# CONFIG_BLK_DEV_TIVO is not set +# CONFIG_BLK_DEV_IDECS is not set +CONFIG_BLK_DEV_IDECD=m +# CONFIG_BLK_DEV_IDETAPE is not set +CONFIG_BLK_DEV_IDEFLOPPY=y +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_CMD640_ENHANCED is not set +CONFIG_BLK_DEV_ISAPNP=y +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_PCI_WIP is not set +# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set +# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set +CONFIG_BLK_DEV_ADMA=y +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_AEC62XX_TUNING=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_AMD74XX_OVERRIDE is not set +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_CMD680=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5530=y +CONFIG_BLK_DEV_HPT34X=y +# CONFIG_HPT34X_AUTODMA is not set +CONFIG_BLK_DEV_HPT366=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_PIIX_TUNING=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_ADMA100 is not set +CONFIG_BLK_DEV_PDC202XX=y +# CONFIG_PDC202XX_BURST is not set +# CONFIG_PDC202XX_FORCE is not set +CONFIG_BLK_DEV_SVWKS=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_BLK_DEV_CENATEK=y +# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_ELEVATOR_NOOP is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set +CONFIG_BLK_DEV_IDE_MODES=y +# CONFIG_BLK_DEV_ATARAID is not set +# CONFIG_BLK_DEV_ATARAID_PDC is not set +# CONFIG_BLK_DEV_ATARAID_HPT is not set + +# +# SCSI support +# +CONFIG_SCSI=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_SD_EXTRA_DEVS=40 +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_SR_EXTRA_DEVS=4 +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_DEBUG_QUEUES is not set +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_7000FASST is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AHA1542 is not set +# CONFIG_SCSI_AHA1740 is not set +# CONFIG_SCSI_AACRAID is not set +CONFIG_SCSI_AIC7XXX=y +CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 +# CONFIG_AIC7XXX_PROBE_EISA_VL is not set +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_SCSI_AIC79XX is not set +CONFIG_SCSI_DPT_I2O=y +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_IN2000 is not set +# CONFIG_SCSI_AM53C974 is not set +# CONFIG_SCSI_MEGARAID is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_DTC3280 is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_DMA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_GENERIC_NCR5380 is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_PPA is not set +# CONFIG_SCSI_IMM is not set +# CONFIG_SCSI_NCR53C406A is not set +# CONFIG_SCSI_NCR53C7xx is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_NCR53C8XX is not set +# CONFIG_SCSI_SYM53C8XX is not set +# CONFIG_SCSI_PAS16 is not set +# CONFIG_SCSI_PCI2000 is not set +# CONFIG_SCSI_PCI2220I is not set +# CONFIG_SCSI_PSI240I is not set +# CONFIG_SCSI_QLOGIC_FAS is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_NEWISP is not set +# CONFIG_SCSI_SEAGATE is not set +# CONFIG_SCSI_SIM710 is not set +# CONFIG_SCSI_SYM53C416 is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_T128 is not set +# CONFIG_SCSI_U14_34F is not set +# CONFIG_SCSI_ULTRASTOR is not set +CONFIG_SCSI_DEBUG=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_BOOT=y +CONFIG_FUSION_ISENSE=m +CONFIG_FUSION_CTL=m +# CONFIG_FUSION_LAN is not set + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set +# CONFIG_I2O_PCI is not set +# CONFIG_I2O_BLOCK is not set +# CONFIG_I2O_LAN is not set +# CONFIG_I2O_SCSI is not set +# CONFIG_I2O_PROC is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_ETHERTAP is not set +# CONFIG_NET_SB1000 is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_SUNLANCE is not set +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNBMAC is not set +# CONFIG_SUNQE is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +CONFIG_TULIP=m +# CONFIG_TC35815 is not set +# CONFIG_TULIP_MWI is not set +CONFIG_TULIP_MMIO=y +CONFIG_DE4X5=m +# CONFIG_DGRS is not set +# CONFIG_DM9102 is not set +CONFIG_EEPRO100=m +CONFIG_NET_E100=m +# CONFIG_LNE390 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_NE3210 is not set +# CONFIG_ES3210 is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_NEW_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_SIS900_OLD is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_VIA_RHINE_MMIO is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +# CONFIG_DL2K is not set +# CONFIG_MYRI_SBUS is not set +CONFIG_NS83820=m +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_SK98LIN is not set +CONFIG_NET_BROADCOM=m +CONFIG_TIGON3=m +CONFIG_NET_E1000=m +# CONFIG_FDDI is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set +# CONFIG_RCPCI is not set +# CONFIG_SHAPER is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Quadrics Supercomputers +# + +# +# QsNet +# +CONFIG_QUADRICS=y +CONFIG_QSNETMOD=m +CONFIG_ELAN3MOD=m +CONFIG_EPMOD=m +CONFIG_EIPMOD=m +CONFIG_RMSMOD=m +CONFIG_JTAG=m + +# +# QsNet II +# + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set +CONFIG_KALLSYMS=y + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +# CONFIG_INPUT is not set +# CONFIG_INPUT_KEYBDEV is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set + +# +# Character devices +# +CONFIG_ECC=m +CONFIG_CHAOSTEST=m +CONFIG_P4THERM=m +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SERIAL_EXTENDED=y +# CONFIG_SERIAL_MANY_PORTS is not set +CONFIG_SERIAL_SHARE_IRQ=y +# CONFIG_SERIAL_DETECT_IRQ is not set +# CONFIG_SERIAL_MULTIPORT is not set +# CONFIG_HUB6 is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=2048 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set + +# +# I2C support +# +CONFIG_I2C=y +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +CONFIG_I2C_MAINBOARD=y +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_HYDRA is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_TSUNAMI is not set +CONFIG_I2C_I801=m +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +CONFIG_I2C_ISA=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_PROC=y + +# +# Hardware sensors support +# +CONFIG_SENSORS=y +CONFIG_SENSORS_ADM1021=m +# CONFIG_SENSORS_ADM1024 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_FSCSCY is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_MAXILIFE is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_MTP008 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +CONFIG_SENSORS_LM87=m +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +CONFIG_SENSORS_W83781D=y +# CONFIG_SENSORS_OTHER is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_MK712_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set + +# +# Input core support is needed for gameports +# + +# +# Input core support is needed for joysticks +# +# CONFIG_QIC02_TAPE is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_AMD_RNG is not set +# CONFIG_INTEL_RNG is not set +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +CONFIG_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +CONFIG_AGP=m +CONFIG_AGP_INTEL=y +# CONFIG_AGP_I810 is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_AMD is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_BATTERY_GERICOM is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Crypto Hardware support +# +# CONFIG_CRYPTO is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BEFS_DEBUG is not set +# CONFIG_BFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_JBD=y +CONFIG_JBD_DEBUG=y +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_UMSDOS_FS=m +CONFIG_VFAT_FS=m +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +CONFIG_CRAMFS=y +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +# CONFIG_JFS_FS is not set +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +CONFIG_ROMFS_FS=m +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UDF_RW is not set +# CONFIG_UFS_FS is not set +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_ROOT_NFS is not set +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_TCP=y +CONFIG_SUNRPC=m +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_NCPFS_PACKET_SIGNING is not set +# CONFIG_NCPFS_IOCTL_LOCKING is not set +# CONFIG_NCPFS_STRONG is not set +# CONFIG_NCPFS_NFS_NS is not set +# CONFIG_NCPFS_OS2_NS is not set +# CONFIG_NCPFS_SMALLDOS is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_NCPFS_EXTRAS is not set +CONFIG_PFS_FS=m +CONFIG_PFS_DIRECTIO=m +CONFIG_ZISOFS_FS=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +CONFIG_NLS_CODEPAGE_850=m +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ISO8859_1=m +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +CONFIG_VIDEO_SELECT=y +# CONFIG_VIDEO_IGNORE_BAD_MODE is not set +# CONFIG_MDA_CONSOLE is not set + +# +# Frame-buffer support +# +# CONFIG_FB is not set +# CONFIG_SPEAKUP is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB is not set + +# +# Bluetooth support +# +# CONFIG_BLUEZ is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_FRAME_POINTER=y +CONFIG_STACK_TRACE_SCAN=y +CONFIG_STACK_TRACE_FPTR=y +CONFIG_STACK_TRACE_PARAM_COUNT=4 +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_MCL_COREDUMP=y +CONFIG_BOOTIMG=y +# CONFIG_OPROFILE is not set +# CONFIG_NOBIGSTACK is not set +CONFIG_STACK_SIZE_16KB=y +# CONFIG_STACK_SIZE_32KB is not set +# CONFIG_STACK_SIZE_64KB is not set +CONFIG_STACK_SIZE_SHIFT=2 + +# +# Library routines +# +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.18-chaos-2.4.18-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.18-chaos-2.4.18-i686.config new file mode 100644 index 0000000..db8f485 --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.18-chaos-2.4.18-i686.config @@ -0,0 +1,1103 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +CONFIG_ISA=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +CONFIG_LOLAT=y +# CONFIG_LOLAT_SYSCTL is not set +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMIII is not set +CONFIG_MPENTIUM4=y +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MELAN is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_TSC=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_MCE=y +# CONFIG_CPU_FREQ is not set +# CONFIG_USE_IRQ_STACKS is not set +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +CONFIG_MICROCODE=m +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m +# CONFIG_E820_PROC is not set +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_HIGHMEM=y +CONFIG_HIGHIO=y +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +CONFIG_SMP=y +# CONFIG_MULTIQUAD is not set +CONFIG_HAVE_DEC_LOCK=y + +# +# General setup +# +CONFIG_HZ=100 +CONFIG_NET=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_NAMES=y + +# +# Performance-monitoring counters support +# +CONFIG_PERFCTR=m +CONFIG_KPERFCTR=y +# CONFIG_PERFCTR_DEBUG is not set +# CONFIG_PERFCTR_INIT_TESTS is not set +CONFIG_PERFCTR_VIRTUAL=y +CONFIG_PERFCTR_GLOBAL=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_HOTPLUG is not set +# CONFIG_PCMCIA is not set +# CONFIG_HOTPLUG_PCI is not set +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=m +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +# CONFIG_IKCONFIG is not set +CONFIG_PM=y + +# +# Additional device driver support +# +# CONFIG_CIPE is not set +# CONFIG_CRYPTO_AEP is not set +# CONFIG_MEGARAC is not set +CONFIG_FC_QLA2200=m +CONFIG_FC_QLA2300=m +# CONFIG_SCSI_ISCSI is not set +CONFIG_IBMASM=m +CONFIG_IBMSER=m +CONFIG_NEW_DIGI_EPCA=m +CONFIG_NVIDIA=m +# CONFIG_ACPI is not set +CONFIG_APM=y +CONFIG_APM_IGNORE_USER_SUSPEND=y +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +CONFIG_APM_RTC_IS_GMT=y +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# Binary emulation of other systems +# +# CONFIG_ABI is not set +# CONFIG_ABI_SVR4 is not set + +# +# Support for foreign binary formats +# +# CONFIG_BINFMT_COFF is not set +# CONFIG_BINFMT_XOUT is not set +# CONFIG_BINFMT_XOUT_X286 is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_PARTITIONS is not set +# CONFIG_MTD_CONCAT is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=m +# CONFIG_MTD_BLOCK is not set +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +CONFIG_MTD_JEDECPROBE=y +CONFIG_MTD_GEN_PROBE=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_NOSWAP=y +# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set +CONFIG_MTD_CFI_GEOMETRY=y +CONFIG_MTD_CFI_B1=y +# CONFIG_MTD_CFI_B2 is not set +# CONFIG_MTD_CFI_B4 is not set +# CONFIG_MTD_CFI_B8 is not set +CONFIG_MTD_CFI_I1=y +# CONFIG_MTD_CFI_I2 is not set +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +# CONFIG_MTD_RAM is not set +CONFIG_MTD_ROM=y +# CONFIG_MTD_ABSENT is not set +# CONFIG_MTD_OBSOLETE_CHIPS is not set +# CONFIG_MTD_AMDSTD is not set +# CONFIG_MTD_SHARP is not set +# CONFIG_MTD_JEDEC is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_PHYSMAP is not set +# CONFIG_MTD_PNC2000 is not set +# CONFIG_MTD_SC520CDP is not set +# CONFIG_MTD_NETSC520 is not set +# CONFIG_MTD_SBC_GXX is not set +# CONFIG_MTD_ELAN_104NC is not set +# CONFIG_MTD_DILNETPC is not set +# CONFIG_MTD_MIXMEM is not set +# CONFIG_MTD_OCTAGON is not set +# CONFIG_MTD_VMAX is not set +# CONFIG_MTD_L440GX is not set +# CONFIG_MTD_AMD766ROM is not set +CONFIG_MTD_ICH2ROM=m +# CONFIG_MTD_PCI is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLKMTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC1000 is not set +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOCPROBE is not set + +# +# NAND Flash Device Drivers +# +# CONFIG_MTD_NAND is not set + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_PC_CML1=m +# CONFIG_PARPORT_SERIAL is not set +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_AMIGA is not set +# CONFIG_PARPORT_MFC3 is not set +# CONFIG_PARPORT_ATARI is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_SUNBPP is not set +# CONFIG_PARPORT_OTHER is not set +CONFIG_PARPORT_1284=y + +# +# Plug and Play configuration +# +CONFIG_PNP=y +CONFIG_ISAPNP=y +# CONFIG_PNPBIOS is not set + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_CISS_SCSI_TAPE is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_ENBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +# CONFIG_MD_LINEAR is not set +# CONFIG_MD_RAID0 is not set +# CONFIG_MD_RAID1 is not set +# CONFIG_MD_RAID5 is not set +# CONFIG_MD_MULTIPATH is not set +CONFIG_BLK_DEV_LVM=m + +# +# Cryptography support (CryptoAPI) +# +# CONFIG_CRYPTO is not set +# CONFIG_CIPHERS is not set +# CONFIG_CRYPTODEV is not set +# CONFIG_CRYPTOLOOP is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_TUX is not set +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_ROUTE_LARGE_TABLES=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +CONFIG_SYN_COOKIES=y + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +# CONFIG_IP_NF_FTP is not set +# CONFIG_IP_NF_IRC is not set +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=m +# CONFIG_IP_NF_MATCH_LIMIT is not set +# CONFIG_IP_NF_MATCH_MAC is not set +# CONFIG_IP_NF_MATCH_MARK is not set +# CONFIG_IP_NF_MATCH_MULTIPORT is not set +# CONFIG_IP_NF_MATCH_TOS is not set +# CONFIG_IP_NF_MATCH_AH_ESP is not set +# CONFIG_IP_NF_MATCH_LENGTH is not set +# CONFIG_IP_NF_MATCH_TTL is not set +# CONFIG_IP_NF_MATCH_TCPMSS is not set +CONFIG_IP_NF_MATCH_STATE=m +# CONFIG_IP_NF_MATCH_UNCLEAN is not set +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_FILTER=m +# CONFIG_IP_NF_TARGET_REJECT is not set +# CONFIG_IP_NF_TARGET_MIRROR is not set +# CONFIG_IP_NF_NAT is not set +# CONFIG_IP_NF_MANGLE is not set +CONFIG_IP_NF_TARGET_LOG=m +# CONFIG_IP_NF_TARGET_ULOG is not set +# CONFIG_IP_NF_TARGET_TCPMSS is not set +# CONFIG_IP_NF_ARPTABLES is not set +# CONFIG_IP_NF_COMPAT_IPCHAINS is not set +# CONFIG_IP_NF_COMPAT_IPFWADM is not set + +# +# IP: Virtual Server Configuration +# +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +CONFIG_KHTTPD=m +# CONFIG_ATM is not set +# CONFIG_VLAN_8021Q is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set + +# +# Appletalk devices +# +# CONFIG_DEV_APPLETALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +CONFIG_NET_PKTGEN=m + +# +# Telephony Support +# +# CONFIG_PHONE is not set +# CONFIG_PHONE_IXJ is not set +# CONFIG_PHONE_IXJ_PCMCIA is not set + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_IDEDISK_STROKE is not set +# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set +# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set +# CONFIG_BLK_DEV_IDEDISK_IBM is not set +# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set +# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set +# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set +# CONFIG_BLK_DEV_IDEDISK_WD is not set +# CONFIG_BLK_DEV_COMMERIAL is not set +# CONFIG_BLK_DEV_TIVO is not set +# CONFIG_BLK_DEV_IDECS is not set +CONFIG_BLK_DEV_IDECD=m +# CONFIG_BLK_DEV_IDETAPE is not set +CONFIG_BLK_DEV_IDEFLOPPY=y +# CONFIG_BLK_DEV_IDESCSI is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_CMD640_ENHANCED is not set +CONFIG_BLK_DEV_ISAPNP=y +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_PCI_WIP is not set +# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set +# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set +CONFIG_BLK_DEV_ADMA=y +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_AEC62XX_TUNING=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_AMD74XX_OVERRIDE is not set +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_CMD680=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5530=y +CONFIG_BLK_DEV_HPT34X=y +# CONFIG_HPT34X_AUTODMA is not set +CONFIG_BLK_DEV_HPT366=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_PIIX_TUNING=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_ADMA100 is not set +CONFIG_BLK_DEV_PDC202XX=y +# CONFIG_PDC202XX_BURST is not set +# CONFIG_PDC202XX_FORCE is not set +CONFIG_BLK_DEV_SVWKS=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_BLK_DEV_CENATEK=y +# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_ELEVATOR_NOOP is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set +CONFIG_BLK_DEV_IDE_MODES=y +# CONFIG_BLK_DEV_ATARAID is not set +# CONFIG_BLK_DEV_ATARAID_PDC is not set +# CONFIG_BLK_DEV_ATARAID_HPT is not set + +# +# SCSI support +# +CONFIG_SCSI=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_SD_EXTRA_DEVS=40 +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_SR_EXTRA_DEVS=4 +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_DEBUG_QUEUES is not set +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI low-level drivers +# +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_7000FASST is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AHA1542 is not set +# CONFIG_SCSI_AHA1740 is not set +# CONFIG_SCSI_AACRAID is not set +CONFIG_SCSI_AIC7XXX=y +CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 +# CONFIG_AIC7XXX_PROBE_EISA_VL is not set +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_SCSI_AIC79XX is not set +CONFIG_SCSI_DPT_I2O=y +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_IN2000 is not set +# CONFIG_SCSI_AM53C974 is not set +# CONFIG_SCSI_MEGARAID is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_DTC3280 is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_DMA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_GENERIC_NCR5380 is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_PPA is not set +# CONFIG_SCSI_IMM is not set +# CONFIG_SCSI_NCR53C406A is not set +# CONFIG_SCSI_NCR53C7xx is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_NCR53C8XX is not set +# CONFIG_SCSI_SYM53C8XX is not set +# CONFIG_SCSI_PAS16 is not set +# CONFIG_SCSI_PCI2000 is not set +# CONFIG_SCSI_PCI2220I is not set +# CONFIG_SCSI_PSI240I is not set +# CONFIG_SCSI_QLOGIC_FAS is not set +# CONFIG_SCSI_QLOGIC_ISP is not set +# CONFIG_SCSI_QLOGIC_FC is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_NEWISP is not set +# CONFIG_SCSI_SEAGATE is not set +# CONFIG_SCSI_SIM710 is not set +# CONFIG_SCSI_SYM53C416 is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_T128 is not set +# CONFIG_SCSI_U14_34F is not set +# CONFIG_SCSI_ULTRASTOR is not set +CONFIG_SCSI_DEBUG=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_BOOT=y +CONFIG_FUSION_ISENSE=m +CONFIG_FUSION_CTL=m +# CONFIG_FUSION_LAN is not set + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set +# CONFIG_I2O_PCI is not set +# CONFIG_I2O_BLOCK is not set +# CONFIG_I2O_LAN is not set +# CONFIG_I2O_SCSI is not set +# CONFIG_I2O_PROC is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_ETHERTAP is not set +# CONFIG_NET_SB1000 is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_SUNLANCE is not set +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNBMAC is not set +# CONFIG_SUNQE is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_CS89x0 is not set +CONFIG_TULIP=m +# CONFIG_TC35815 is not set +# CONFIG_TULIP_MWI is not set +CONFIG_TULIP_MMIO=y +CONFIG_DE4X5=m +# CONFIG_DGRS is not set +# CONFIG_DM9102 is not set +CONFIG_EEPRO100=m +CONFIG_NET_E100=m +# CONFIG_LNE390 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_NE3210 is not set +# CONFIG_ES3210 is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_NEW_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_SIS900_OLD is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_VIA_RHINE_MMIO is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +# CONFIG_DL2K is not set +# CONFIG_MYRI_SBUS is not set +CONFIG_NS83820=m +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_SK98LIN is not set +CONFIG_NET_BROADCOM=m +CONFIG_TIGON3=m +CONFIG_NET_E1000=m +# CONFIG_FDDI is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set +# CONFIG_NET_FC is not set +# CONFIG_RCPCI is not set +# CONFIG_SHAPER is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Quadrics Supercomputers +# + +# +# QsNet +# +CONFIG_QUADRICS=y +CONFIG_QSNETMOD=m +CONFIG_ELAN3MOD=m +CONFIG_EPMOD=m +CONFIG_EIPMOD=m +CONFIG_RMSMOD=m +CONFIG_JTAG=m + +# +# QsNet II +# + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set +CONFIG_KALLSYMS=y + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +# CONFIG_INPUT is not set +# CONFIG_INPUT_KEYBDEV is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set + +# +# Character devices +# +CONFIG_ECC=m +CONFIG_CHAOSTEST=m +CONFIG_P4THERM=m +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SERIAL_EXTENDED=y +# CONFIG_SERIAL_MANY_PORTS is not set +CONFIG_SERIAL_SHARE_IRQ=y +# CONFIG_SERIAL_DETECT_IRQ is not set +# CONFIG_SERIAL_MULTIPORT is not set +# CONFIG_HUB6 is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=2048 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set + +# +# I2C support +# +CONFIG_I2C=y +# CONFIG_I2C_ALGOBIT is not set +# CONFIG_I2C_ALGOPCF is not set +CONFIG_I2C_MAINBOARD=y +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_HYDRA is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_TSUNAMI is not set +CONFIG_I2C_I801=m +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +CONFIG_I2C_ISA=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_PROC=y + +# +# Hardware sensors support +# +CONFIG_SENSORS=y +CONFIG_SENSORS_ADM1021=m +# CONFIG_SENSORS_ADM1024 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_FSCSCY is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_MAXILIFE is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_MTP008 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +CONFIG_SENSORS_LM87=m +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +CONFIG_SENSORS_W83781D=y +# CONFIG_SENSORS_OTHER is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +# CONFIG_MK712_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set + +# +# Input core support is needed for gameports +# + +# +# Input core support is needed for joysticks +# +# CONFIG_QIC02_TAPE is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_AMD_RNG is not set +# CONFIG_INTEL_RNG is not set +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +CONFIG_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +CONFIG_AGP=m +CONFIG_AGP_INTEL=y +# CONFIG_AGP_I810 is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_AMD is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_BATTERY_GERICOM is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Crypto Hardware support +# +# CONFIG_CRYPTO is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_ADFS_FS is not set +# CONFIG_AFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BEFS_DEBUG is not set +# CONFIG_BFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_JBD=y +CONFIG_JBD_DEBUG=y +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_UMSDOS_FS=m +CONFIG_VFAT_FS=m +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +CONFIG_CRAMFS=y +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +# CONFIG_JFS_FS is not set +# CONFIG_JFS_DEBUG is not set +# CONFIG_JFS_STATISTICS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +CONFIG_ROMFS_FS=m +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UDF_RW is not set +# CONFIG_UFS_FS is not set +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +# CONFIG_ROOT_NFS is not set +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_TCP=y +CONFIG_SUNRPC=m +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_NCPFS_PACKET_SIGNING is not set +# CONFIG_NCPFS_IOCTL_LOCKING is not set +# CONFIG_NCPFS_STRONG is not set +# CONFIG_NCPFS_NFS_NS is not set +# CONFIG_NCPFS_OS2_NS is not set +# CONFIG_NCPFS_SMALLDOS is not set +# CONFIG_NCPFS_NLS is not set +# CONFIG_NCPFS_EXTRAS is not set +CONFIG_PFS_FS=m +CONFIG_PFS_DIRECTIO=m +CONFIG_ZISOFS_FS=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +# CONFIG_SMB_NLS is not set +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +CONFIG_NLS_CODEPAGE_850=m +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ISO8859_1=m +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +CONFIG_VIDEO_SELECT=y +# CONFIG_VIDEO_IGNORE_BAD_MODE is not set +# CONFIG_MDA_CONSOLE is not set + +# +# Frame-buffer support +# +# CONFIG_FB is not set +# CONFIG_SPEAKUP is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +# CONFIG_USB is not set + +# +# Bluetooth support +# +# CONFIG_BLUEZ is not set + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_FRAME_POINTER=y +CONFIG_STACK_TRACE_SCAN=y +CONFIG_STACK_TRACE_FPTR=y +CONFIG_STACK_TRACE_PARAM_COUNT=4 +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_IOVIRT is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_MCL_COREDUMP=y +CONFIG_BOOTIMG=y +# CONFIG_OPROFILE is not set +# CONFIG_NOBIGSTACK is not set +CONFIG_STACK_SIZE_16KB=y +# CONFIG_STACK_SIZE_32KB is not set +# CONFIG_STACK_SIZE_64KB is not set +CONFIG_STACK_SIZE_SHIFT=2 + +# +# Library routines +# +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i586-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686-smp.config similarity index 100% rename from lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i586-smp.config rename to lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686-smp.config diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i586.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686.config similarity index 100% rename from lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i586.config rename to lustre/kernel_patches/kernel_configs/kernel-2.4.20-rh-2.4-i686.config diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config new file mode 100644 index 0000000..eca7425 --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config @@ -0,0 +1,2036 @@ +# +# Automatically generated by make menuconfig: don't edit +# +CONFIG_X86_64=y +CONFIG_X86=y +# CONFIG_ISA is not set +# CONFIG_SBUS is not set +CONFIG_UID16=y +CONFIG_X86_CMPXCHG=y +CONFIG_EARLY_PRINTK=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +CONFIG_MK8=y +# CONFIG_GENERIC_CPU is not set +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_TSC=y +CONFIG_X86_GOOD_APIC=y + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_PROC_INTF=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_24_API=y +CONFIG_X86_POWERNOW_K8=m +# CONFIG_X86_POWERNOW_K8_DBG is not set +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m +# CONFIG_MATH_EMULATION is not set +# CONFIG_MCA is not set +# CONFIG_EISA is not set +CONFIG_X86_IO_APIC=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_MTRR=y +# CONFIG_SMP is not set +CONFIG_HPET_TIMER=y +CONFIG_GART_IOMMU=y +CONFIG_X86_UP_IOAPIC=y +CONFIG_MCE=y +# CONFIG_K8_NUMA is not set + +# +# General setup +# +# CONFIG_DESKTOP is not set +CONFIG_NET=y +CONFIG_EVLOG=y +CONFIG_EVLOG_BUFSIZE=128 +CONFIG_EVLOG_FWPRINTK=y +# CONFIG_EVLOG_PRINTKWLOC is not set +CONFIG_PCI=y +CONFIG_PCI_DIRECT=y +# CONFIG_PCI_NAMES is not set +CONFIG_HOTPLUG=y + +# +# PCMCIA/CardBus support +# +CONFIG_PCMCIA=m +CONFIG_CARDBUS=y +CONFIG_TCIC=y +CONFIG_I82092=y +CONFIG_I82365=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_COMPAQ is not set +# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set +# CONFIG_HOTPLUG_PCI_IBM is not set +CONFIG_HOTPLUG_PCI_ACPI=m +CONFIG_HOTPLUG_PCI_AMD=m +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_MAX_USER_RT_PRIO=100 +CONFIG_MAX_RT_PRIO=0 +CONFIG_KCORE_ELF=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_PM=y +CONFIG_IA32_EMULATION=y + +# +# ACPI Support +# +CONFIG_ACPI=y +# CONFIG_ACPI_HT_ONLY is not set +CONFIG_ACPI_INITRD=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_BUS=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SYSTEM=y +CONFIG_ACPI_AC=m +CONFIG_ACPI_BATTERY=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +CONFIG_ACPI_ASUS=m +CONFIG_ACPI_TOSHIBA=m +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_RELAXED_AML=y + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=m +# CONFIG_MTD_DEBUG is not set +CONFIG_MTD_PARTITIONS=m +CONFIG_MTD_CONCAT=m +CONFIG_MTD_REDBOOT_PARTS=m +CONFIG_MTD_CMDLINE_PARTS=m +CONFIG_MTD_CHAR=m +CONFIG_MTD_BLOCK=m +CONFIG_MTD_BLOCK_RO=m +CONFIG_FTL=m +CONFIG_NFTL=m +# CONFIG_NFTL_RW is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=m +CONFIG_MTD_JEDECPROBE=m +CONFIG_MTD_GEN_PROBE=m +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_NOSWAP=y +# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_GEOMETRY is not set +CONFIG_MTD_CFI_INTELEXT=m +CONFIG_MTD_CFI_AMDSTD=m +CONFIG_MTD_CFI_STAA=m +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +CONFIG_MTD_ABSENT=m +CONFIG_MTD_OBSOLETE_CHIPS=y +CONFIG_MTD_AMDSTD=m +CONFIG_MTD_SHARP=m +CONFIG_MTD_JEDEC=m + +# +# Mapping drivers for chip access +# +CONFIG_MTD_PHYSMAP=m +CONFIG_MTD_PHYSMAP_START=8000000 +CONFIG_MTD_PHYSMAP_LEN=4000000 +CONFIG_MTD_PHYSMAP_BUSWIDTH=2 +CONFIG_MTD_PNC2000=m +CONFIG_MTD_SC520CDP=m +CONFIG_MTD_NETSC520=m +CONFIG_MTD_SBC_GXX=m +CONFIG_MTD_ELAN_104NC=m +CONFIG_MTD_DILNETPC=m +CONFIG_MTD_DILNETPC_BOOTSIZE=80000 +CONFIG_MTD_MIXMEM=m +CONFIG_MTD_OCTAGON=m +CONFIG_MTD_VMAX=m +CONFIG_MTD_SCx200_DOCFLASH=m +CONFIG_MTD_L440GX=m +# CONFIG_MTD_AMD76XROM is not set +CONFIG_MTD_ICH2ROM=m +CONFIG_MTD_NETtel=m +# CONFIG_MTD_SCB2_FLASH is not set +CONFIG_MTD_PCI=m +CONFIG_MTD_PCMCIA=m + +# +# Self-contained MTD device drivers +# +CONFIG_MTD_PMC551=m +CONFIG_MTD_PMC551_BUGFIX=y +# CONFIG_MTD_PMC551_DEBUG is not set +CONFIG_MTD_SLRAM=m +CONFIG_MTD_MTDRAM=m +CONFIG_MTDRAM_TOTAL_SIZE=4096 +CONFIG_MTDRAM_ERASE_SIZE=128 +CONFIG_MTD_BLKMTD=m +CONFIG_MTD_DOC1000=m +CONFIG_MTD_DOC2000=m +CONFIG_MTD_DOC2001=m +CONFIG_MTD_DOCPROBE=m +CONFIG_MTD_DOCPROBE_ADVANCED=y +CONFIG_MTD_DOCPROBE_ADDRESS=0000 +CONFIG_MTD_DOCPROBE_HIGH=y +CONFIG_MTD_DOCPROBE_55AA=y + +# +# NAND Flash Device Drivers +# +CONFIG_MTD_NAND=m +# CONFIG_MTD_NAND_VERIFY_WRITE is not set +CONFIG_MTD_NAND_IDS=m + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_PC_CML1=m +CONFIG_PARPORT_SERIAL=m +CONFIG_PARPORT_PC_FIFO=y +CONFIG_PARPORT_PC_SUPERIO=y +CONFIG_PARPORT_PC_PCMCIA=m +# CONFIG_PARPORT_AMIGA is not set +# CONFIG_PARPORT_MFC3 is not set +# CONFIG_PARPORT_ATARI is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_SUNBPP is not set +CONFIG_PARPORT_OTHER=y +CONFIG_PARPORT_1284=y + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_XD is not set +CONFIG_PARIDE=m +CONFIG_PARIDE_PARPORT=m +CONFIG_PARIDE_PD=m +CONFIG_PARIDE_PCD=m +CONFIG_PARIDE_PF=m +CONFIG_PARIDE_PT=m +CONFIG_PARIDE_PG=m +CONFIG_PARIDE_ATEN=m +CONFIG_PARIDE_BPCK=m +CONFIG_PARIDE_BPCK6=m +CONFIG_PARIDE_COMM=m +CONFIG_PARIDE_DSTR=m +CONFIG_PARIDE_FIT2=m +CONFIG_PARIDE_FIT3=m +CONFIG_PARIDE_EPAT=m +CONFIG_PARIDE_EPATC8=y +CONFIG_PARIDE_EPIA=m +CONFIG_PARIDE_FRIQ=m +CONFIG_PARIDE_FRPW=m +CONFIG_PARIDE_KBIC=m +CONFIG_PARIDE_KTTI=m +CONFIG_PARIDE_ON20=m +CONFIG_PARIDE_ON26=m +CONFIG_BLK_CPQ_DA=m +CONFIG_BLK_CPQ_CISS_DA=m +CONFIG_CISS_SCSI_TAPE=y +CONFIG_BLK_DEV_DAC960=m +CONFIG_BLK_DEV_UMEM=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_CIPHER_TWOFISH=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=128000 +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_STATS=y + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID5=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_LVM=m +CONFIG_BLK_DEV_DM=m +CONFIG_BLK_DEV_DM_MIRROR=m + +# +# Cryptography support (CryptoAPI) +# +CONFIG_CRYPTO=y +CONFIG_CIPHERS=y +CONFIG_CIPHER_AES=m +CONFIG_CIPHER_3DES=m +CONFIG_CIPHER_NULL=m +CONFIG_CIPHER_DES=m +CONFIG_DIGESTS=y +CONFIG_DIGEST_MD5=m +CONFIG_DIGEST_SHA1=m +CONFIG_CRYPTODEV=y +CONFIG_CRYPTOLOOP=m +CONFIG_CRYPTOLOOP_ATOMIC=y +# CONFIG_CRYPTOLOOP_IV_HACK is not set +# CONFIG_CRYPTOLOOP_DEBUG is not set + +# +# Networking options +# +CONFIG_PACKET=m +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=m +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IPSEC=m +CONFIG_IPSEC_DEBUG=y +CONFIG_IPSEC_DEBUG_DISABLE_DEFAULT=y +CONFIG_IPSEC_TUNNEL=y +CONFIG_TUX=m +CONFIG_TUX_EXTCGI=y +CONFIG_TUX_EXTENDED_LOG=y +# CONFIG_TUX_DEBUG is not set +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_ROUTE_LARGE_TABLES=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_INET_ECN=y +CONFIG_SYN_COOKIES=y +CONFIG_IP_IPSEC=m + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_LIMIT=m +CONFIG_IP_NF_MATCH_MAC=m +CONFIG_IP_NF_MATCH_PKTTYPE=m +CONFIG_IP_NF_MATCH_MARK=m +CONFIG_IP_NF_MATCH_MULTIPORT=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_PSD=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_DSCP=m +CONFIG_IP_NF_MATCH_AH_ESP=m +CONFIG_IP_NF_MATCH_LENGTH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_TCPMSS=m +CONFIG_IP_NF_MATCH_HELPER=m +CONFIG_IP_NF_MATCH_STATE=m +CONFIG_IP_NF_MATCH_CONNTRACK=m +CONFIG_IP_NF_MATCH_IPLIMIT=m +CONFIG_IP_NF_MATCH_UNCLEAN=m +CONFIG_IP_NF_MATCH_STRING=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_MIRROR=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_NAT_AMANDA=m +# CONFIG_IP_NF_NAT_LOCAL is not set +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_DSCP=m +CONFIG_IP_NF_TARGET_MARK=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_COMPAT_IPCHAINS=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_COMPAT_IPFWADM=m +CONFIG_IP_NF_NAT_NEEDED=y + +# +# IP: Virtual Server Configuration +# +CONFIG_IP_VS=m +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=12 +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IPV6=m +# CONFIG_IPV6_DEBUG is not set +CONFIG_IPV6_IM=y +CONFIG_IPV6_MODULE_IP_GRE=y +CONFIG_IPV6_ISATAP=y +CONFIG_IPV6_PREFIXLIST=y +CONFIG_IPV6_6TO4_NEXTHOP=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_SUBTREES=y +# CONFIG_IPV6_MLD6_ALL_DONE is not set +# CONFIG_IPV6_NODEINFO is not set +# CONFIG_IPV6_ZONE is not set +CONFIG_IPV6_DROP_FAKE_V4MAPPED=y + +# +# IPv6: Netfilter Configuration +# +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_LIMIT=m +CONFIG_IP6_NF_MATCH_MAC=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_MULTIPORT=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_MARK=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AHESP=m +CONFIG_IP6_NF_MATCH_LENGTH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_MARK=m +CONFIG_IPV6_IPSEC=y +CONFIG_IPV6_IPSEC_TUNNEL=y +CONFIG_IPV6_IPV6_TUNNEL=m +CONFIG_IPV6_MOBILITY=m +CONFIG_IPV6_MOBILITY_CN=m +CONFIG_IPV6_MOBILITY_MN=m +CONFIG_IPV6_MOBILITY_HA=m +# CONFIG_IPV6_MOBILITY_DEBUG is not set +# CONFIG_SHARED_IPV6_CARDS is not set +CONFIG_KHTTPD=m +CONFIG_KHTTPD_IPV6=y +# CONFIG_ATM is not set +CONFIG_VLAN_8021Q=m +CONFIG_IPX=m +# CONFIG_IPX_INTERN is not set +CONFIG_ATALK=m + +# +# Appletalk devices +# +CONFIG_DEV_APPLETALK=y +CONFIG_COPS_DAYNA=y +CONFIG_COPS_TANGENT=y +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_DECNET=m +CONFIG_DECNET_SIOCGIFCONF=y +# CONFIG_DECNET_ROUTER is not set +CONFIG_BRIDGE=m +CONFIG_X25=m +CONFIG_LAPB=m +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +CONFIG_ECONET=m +# CONFIG_ECONET_AUNUDP is not set +# CONFIG_ECONET_NATIVE is not set +CONFIG_WAN_ROUTER=m +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_CSZ=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_QOS=y +CONFIG_NET_ESTIMATOR=y +CONFIG_NET_CLS=y +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_POLICE=y + +# +# Network testing +# +CONFIG_NET_PKTGEN=m + +# +# Telephony Support +# +CONFIG_PHONE=m +CONFIG_PHONE_IXJ=m +CONFIG_PHONE_IXJ_PCMCIA=m + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_IDEDISK_STROKE=y +CONFIG_BLK_DEV_IDECS=m +CONFIG_BLK_DEV_IDECD=m +CONFIG_BLK_DEV_IDETAPE=m +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_BLK_DEV_CMD640=y +CONFIG_BLK_DEV_CMD640_ENHANCED=y +# CONFIG_BLK_DEV_ISAPNP is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +CONFIG_BLK_DEV_OFFBOARD=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +CONFIG_IDEDMA_ONLYDISK=y +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_PCI_WIP is not set +# CONFIG_BLK_DEV_ADMA100 is not set +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_AMD74XX_OVERRIDE is not set +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_TRIFLEX=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5530=y +CONFIG_BLK_DEV_HPT34X=y +# CONFIG_HPT34X_AUTODMA is not set +CONFIG_BLK_DEV_HPT366=y +# CONFIG_BLK_DEV_PIIX is not set +CONFIG_BLK_DEV_NS87415=y +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_PDC202XX_OLD=y +CONFIG_PDC202XX_BURST=y +CONFIG_BLK_DEV_PDC202XX_NEW=y +CONFIG_PDC202XX_FORCE=y +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_SC1200=y +# CONFIG_BLK_DEV_SVWKS is not set +CONFIG_BLK_DEV_SIIMAGE=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +CONFIG_BLK_DEV_TRM290=y +CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_BLK_DEV_CENATEK=y +# CONFIG_IDE_CHIPSETS is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set +CONFIG_BLK_DEV_PDC202XX=y +CONFIG_BLK_DEV_IDE_MODES=y +CONFIG_BLK_DEV_ATARAID=m +CONFIG_BLK_DEV_ATARAID_PDC=m +CONFIG_BLK_DEV_ATARAID_HPT=m +CONFIG_BLK_DEV_ATARAID_SII=m + +# +# SCSI support +# +CONFIG_SCSI=m +CONFIG_BLK_DEV_SD=m +CONFIG_SD_MAX_MAJORS=144 +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_SR_EXTRA_DEVS=4 +CONFIG_CHR_DEV_SCH=m +CONFIG_CHR_DEV_SG=m +# CONFIG_SCSI_DEBUG_QUEUES is not set +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI low-level drivers +# +CONFIG_BLK_DEV_3W_XXXX_RAID=m +# CONFIG_SCSI_7000FASST is not set +CONFIG_SCSI_ACARD=m +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AHA1542 is not set +# CONFIG_SCSI_AHA1740 is not set +CONFIG_SCSI_AACRAID=m +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +CONFIG_AIC7XXX_PROBE_EISA_VL=y +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +CONFIG_SCSI_AIC79XX=m +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=15000 +# CONFIG_AIC79XX_BUILD_FIRMWARE is not set +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +CONFIG_AIC79XX_REG_PRETTY_PRINT=y +CONFIG_SCSI_AIC7XXX_OLD=m +CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y +CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=24 +CONFIG_AIC7XXX_OLD_PROC_STATS=y +CONFIG_SCSI_DPT_I2O=m +CONFIG_SCSI_ADVANSYS=m +CONFIG_SCSI_IN2000=m +CONFIG_SCSI_AM53C974=m +CONFIG_SCSI_MEGARAID=m +CONFIG_SCSI_MEGARAID2=m +CONFIG_SCSI_BUSLOGIC=m +# CONFIG_SCSI_OMIT_FLASHPOINT is not set +CONFIG_SCSI_CPQFCTS=m +CONFIG_SCSI_DMX3191D=m +# CONFIG_SCSI_DTC3280 is not set +CONFIG_SCSI_EATA=m +CONFIG_SCSI_EATA_TAGGED_QUEUE=y +CONFIG_SCSI_EATA_LINKED_COMMANDS=y +CONFIG_SCSI_EATA_MAX_TAGS=16 +CONFIG_SCSI_EATA_DMA=m +CONFIG_SCSI_EATA_PIO=m +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_FUTURE_DOMAIN=m +CONFIG_SCSI_GDTH=m +CONFIG_SCSI_GENERIC_NCR5380=m +CONFIG_SCSI_GENERIC_NCR53C400=y +CONFIG_SCSI_G_NCR5380_PORT=y +# CONFIG_SCSI_G_NCR5380_MEM is not set +CONFIG_SCSI_IPS_OLD=m +CONFIG_SCSI_IPS=m +CONFIG_SCSI_INITIO=m +CONFIG_SCSI_INIA100=m +CONFIG_SCSI_PPA=m +CONFIG_SCSI_IMM=m +# CONFIG_SCSI_IZIP_EPP16 is not set +# CONFIG_SCSI_IZIP_SLOW_CTR is not set +CONFIG_PPSCSI=m +CONFIG_PPSCSI_T348=m +CONFIG_PPSCSI_T358=m +CONFIG_PPSCSI_VPI0=m +CONFIG_PPSCSI_VPI2=m +CONFIG_PPSCSI_ONSCSI=m +CONFIG_PPSCSI_SPARCSI=m +CONFIG_PPSCSI_EPSA2=m +CONFIG_PPSCSI_EPST=m +# CONFIG_SCSI_NCR53C406A is not set +CONFIG_SCSI_NCR53C7xx=m +# CONFIG_SCSI_NCR53C7xx_sync is not set +CONFIG_SCSI_NCR53C7xx_FAST=y +CONFIG_SCSI_NCR53C7xx_DISCONNECT=y +CONFIG_SCSI_SYM53C8XX_2=m +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_NCR53C8XX=m +CONFIG_SCSI_SYM53C8XX=m +CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 +CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 +CONFIG_SCSI_NCR53C8XX_SYNC=80 +CONFIG_SCSI_NCR53C8XX_PROFILE=y +# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set +CONFIG_SCSI_NCR53C8XX_PQS_PDS=y +# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set +# CONFIG_SCSI_PAS16 is not set +CONFIG_SCSI_PCI2000=m +CONFIG_SCSI_PCI2220I=m +# CONFIG_SCSI_PSI240I is not set +# CONFIG_SCSI_QLOGIC_FAS is not set +CONFIG_SCSI_QLOGIC_ISP=m +CONFIG_SCSI_QLOGIC_FC=m +CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y +CONFIG_SCSI_QLOGIC_1280=m +CONFIG_SCSI_QLOGIC_QLA2XXX=y +CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2100=m +CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2200=m +CONFIG_SCSI_QLOGIC_QLA2XXX_QLA2300=m +CONFIG_SCSI_SEAGATE=m +CONFIG_SCSI_SIM710=m +CONFIG_SCSI_SYM53C416=m +CONFIG_SCSI_DC395x_TRMS1040=m +CONFIG_SCSI_DC390T=m +# CONFIG_SCSI_DC390T_NOGENSUPP is not set +# CONFIG_SCSI_T128 is not set +CONFIG_SCSI_U14_34F=m +CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y +CONFIG_SCSI_U14_34F_MAX_TAGS=8 +CONFIG_SCSI_ULTRASTOR=m +CONFIG_SCSI_NSP32=m +CONFIG_SCSI_DEBUG=m + +# +# PCMCIA SCSI adapter support +# +CONFIG_SCSI_PCMCIA=y +CONFIG_PCMCIA_AHA152X=m +CONFIG_PCMCIA_FDOMAIN=m +CONFIG_PCMCIA_NINJA_SCSI=m +CONFIG_PCMCIA_QLOGIC=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=m +# CONFIG_FUSION_BOOT is not set +CONFIG_FUSION_MAX_SGE=40 +CONFIG_FUSION_ISENSE=m +CONFIG_FUSION_CTL=m +CONFIG_FUSION_LAN=m +CONFIG_NET_FC=y + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +CONFIG_IEEE1394=m +CONFIG_IEEE1394_PCILYNX=m +CONFIG_IEEE1394_OHCI1394=m +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=m +CONFIG_IEEE1394_CMP=m +CONFIG_IEEE1394_AMDTP=m +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +# CONFIG_IEEE1394_OUI_DB is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +CONFIG_ARCNET=m +CONFIG_ARCNET_1201=m +CONFIG_ARCNET_1051=m +CONFIG_ARCNET_RAW=m +CONFIG_ARCNET_COM90xx=m +CONFIG_ARCNET_COM90xxIO=m +CONFIG_ARCNET_RIM_I=m +CONFIG_ARCNET_COM20020=m +# CONFIG_ARCNET_COM20020_ISA is not set +CONFIG_ARCNET_COM20020_PCI=m +CONFIG_DUMMY=m +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m +CONFIG_ETHERTAP=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_SUNLANCE is not set +CONFIG_HAPPYMEAL=m +# CONFIG_SUNBMAC is not set +# CONFIG_SUNQE is not set +CONFIG_SUNGEM=m +CONFIG_NET_VENDOR_3COM=y +# CONFIG_EL1 is not set +# CONFIG_EL2 is not set +# CONFIG_ELPLUS is not set +# CONFIG_EL16 is not set +# CONFIG_ELMC is not set +# CONFIG_ELMC_II is not set +CONFIG_VORTEX=m +CONFIG_TYPHOON=m +# CONFIG_LANCE is not set +CONFIG_NET_VENDOR_SMC=y +# CONFIG_WD80x3 is not set +# CONFIG_ULTRAMCA is not set +# CONFIG_ULTRA is not set +# CONFIG_ULTRA32 is not set +# CONFIG_SMC9194 is not set +CONFIG_NET_VENDOR_RACAL=y +# CONFIG_NI5010 is not set +# CONFIG_NI52 is not set +# CONFIG_NI65 is not set +CONFIG_HP100=m +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=m +CONFIG_PCNET32_OLD=m +CONFIG_AMD8111_ETH=m +CONFIG_ADAPTEC_STARFIRE=m +# CONFIG_APRICOT is not set +CONFIG_NET_BCM4400=m +# CONFIG_CS89x0 is not set +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +CONFIG_DE4X5=m +CONFIG_DGRS=m +CONFIG_DM9102=m +CONFIG_EEPRO100=m +# CONFIG_EEPRO100_PIO is not set +CONFIG_E100=m +# CONFIG_LNE390 is not set +CONFIG_FEALNX=m +CONFIG_NATSEMI=m +CONFIG_NETGEAR_GA621=m +CONFIG_NETGEAR_GA622=m +CONFIG_NE2K_PCI=m +# CONFIG_NE3210 is not set +# CONFIG_ES3210 is not set +CONFIG_8139CP=m +CONFIG_8139TOO=m +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +CONFIG_8139TOO_8129=y +# CONFIG_8139_OLD_RX_RESET is not set +CONFIG_SIS900=m +CONFIG_EPIC100=m +CONFIG_SUNDANCE=m +# CONFIG_SUNDANCE_MMIO is not set +CONFIG_TLAN=m +CONFIG_TC35815=m +CONFIG_VIA_RHINE=m +# CONFIG_VIA_RHINE_MMIO is not set +CONFIG_WINBOND_840=m +CONFIG_NET_POCKET=y +# CONFIG_DE600 is not set +# CONFIG_DE620 is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +CONFIG_DL2K=m +CONFIG_E1000=m +# CONFIG_MYRI_SBUS is not set +CONFIG_NS83820=m +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_R8169=m +CONFIG_SK98LIN=m +CONFIG_SK98LIN_T1=y +CONFIG_SK98LIN_T3=y +CONFIG_SK98LIN_T8=y +CONFIG_SK98LIN_T6=y +CONFIG_SK98LIN_T9=y +CONFIG_SK98LIN_T4=y +CONFIG_SK98LIN_T7=y +CONFIG_SK98LIN_T2=y +CONFIG_SK98LIN_T5=y +CONFIG_SK9DLIN=m +CONFIG_TIGON3=m +CONFIG_NET_BROADCOM=m +CONFIG_FDDI=y +CONFIG_DEFXX=m +CONFIG_SKFP=m +CONFIG_NETCONSOLE=m +CONFIG_HIPPI=y +CONFIG_ROADRUNNER=m +CONFIG_ROADRUNNER_LARGE_RINGS=y +CONFIG_PLIP=m +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPPOE=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y + +# +# Wireless LAN (non-hamradio) +# +CONFIG_NET_RADIO=y +CONFIG_STRIP=m +CONFIG_WAVELAN=m +CONFIG_AIRONET4500=m +CONFIG_AIRONET4500_NONCS=m +CONFIG_AIRONET4500_PNP=y +CONFIG_AIRONET4500_PCI=y +CONFIG_AIRONET4500_ISA=y +CONFIG_AIRONET4500_I365=y +CONFIG_AIRONET4500_PROC=m +CONFIG_AIRO=m +CONFIG_HERMES=m +CONFIG_PLX_HERMES=m +CONFIG_PCI_HERMES=m +CONFIG_PCMCIA_HERMES=m +CONFIG_AIRO_CS=m +CONFIG_NET_WIRELESS=y + +# +# Token Ring devices +# +# CONFIG_TR is not set +CONFIG_NET_FC=y +CONFIG_IPHASE5526=m +CONFIG_RCPCI=m +CONFIG_SHAPER=m + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# PCMCIA network device support +# +CONFIG_NET_PCMCIA=y +CONFIG_PCMCIA_3C589=m +CONFIG_PCMCIA_3C574=m +CONFIG_PCMCIA_FMVJ18X=m +CONFIG_PCMCIA_PCNET=m +CONFIG_PCMCIA_AXNET=m +CONFIG_PCMCIA_NMCLAN=m +CONFIG_PCMCIA_SMC91C92=m +CONFIG_PCMCIA_XIRC2PS=m +CONFIG_ARCNET_COM20020_CS=m +# CONFIG_PCMCIA_IBMTR is not set +CONFIG_PCMCIA_XIRCOM=m +CONFIG_PCMCIA_XIRTULIP=m +CONFIG_NET_PCMCIA_RADIO=y +CONFIG_PCMCIA_RAYCS=m +CONFIG_PCMCIA_NETWAVE=m +CONFIG_PCMCIA_WAVELAN=m +CONFIG_AIRONET4500_CS=m + +# +# Amateur Radio support +# +CONFIG_HAMRADIO=y +CONFIG_AX25=m +CONFIG_AX25_DAMA_SLAVE=y +CONFIG_NETROM=m +CONFIG_ROSE=m + +# +# AX.25 network device drivers +# +CONFIG_MKISS=m +CONFIG_6PACK=m +CONFIG_BPQETHER=m +CONFIG_SCC_DELAY=y +CONFIG_SCC_TRXECHO=y +CONFIG_BAYCOM_SER_FDX=m +CONFIG_BAYCOM_SER_HDX=m +CONFIG_BAYCOM_PAR=m +CONFIG_BAYCOM_EPP=m +CONFIG_SOUNDMODEM=m +CONFIG_SOUNDMODEM_SBC=y +CONFIG_SOUNDMODEM_WSS=y +CONFIG_SOUNDMODEM_AFSK1200=y +CONFIG_SOUNDMODEM_AFSK2400_7=y +CONFIG_SOUNDMODEM_AFSK2400_8=y +CONFIG_SOUNDMODEM_AFSK2666=y +CONFIG_SOUNDMODEM_HAPN4800=y +CONFIG_SOUNDMODEM_PSK4800=y +CONFIG_SOUNDMODEM_FSK9600=y +CONFIG_YAM=m + +# +# IrDA (infrared) support +# +CONFIG_IRDA=m +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +CONFIG_IRDA_ULTRA=y +CONFIG_IRDA_CACHE_LAST_LSAP=y +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# +CONFIG_IRTTY_SIR=m +CONFIG_IRPORT_SIR=m +CONFIG_DONGLE=y +CONFIG_ESI_DONGLE=m +CONFIG_ACTISYS_DONGLE=m +CONFIG_TEKRAM_DONGLE=m +CONFIG_GIRBIL_DONGLE=m +CONFIG_LITELINK_DONGLE=m +CONFIG_MCP2120_DONGLE=m +CONFIG_OLD_BELKIN_DONGLE=m +CONFIG_ACT200L_DONGLE=m +CONFIG_MA600_DONGLE=m +CONFIG_USB_IRDA=m +CONFIG_NSC_FIR=m +CONFIG_WINBOND_FIR=m +# CONFIG_TOSHIBA_OLD is not set +CONFIG_TOSHIBA_FIR=m +CONFIG_SMC_IRCC_FIR=m +CONFIG_ALI_FIR=m +CONFIG_VLSI_FIR=m + +# +# ISDN subsystem +# +CONFIG_ISDN=m +CONFIG_ISDN_BOOL=y +CONFIG_ISDN_PPP=y +CONFIG_IPPP_FILTER=y +CONFIG_ISDN_PPP_VJ=y +CONFIG_ISDN_MPP=y +CONFIG_ISDN_PPP_BSDCOMP=m +CONFIG_ISDN_AUDIO=y +CONFIG_ISDN_TTY_FAX=y +CONFIG_ISDN_X25=y + +# +# ISDN feature submodules +# +CONFIG_ISDN_DRV_LOOP=m +CONFIG_ISDN_DIVERSION=m + +# +# Passive ISDN cards +# +CONFIG_ISDN_DRV_HISAX=m +CONFIG_ISDN_HISAX=y +CONFIG_HISAX_EURO=y +CONFIG_DE_AOC=y +# CONFIG_HISAX_NO_SENDCOMPLETE is not set +# CONFIG_HISAX_NO_LLC is not set +# CONFIG_HISAX_NO_KEYPAD is not set +CONFIG_HISAX_1TR6=y +CONFIG_HISAX_NI1=y +CONFIG_HISAX_MAX_CARDS=8 +CONFIG_HISAX_TELESPCI=y +CONFIG_HISAX_S0BOX=y +CONFIG_HISAX_FRITZPCI=y +CONFIG_HISAX_AVM_A1_PCMCIA=y +CONFIG_HISAX_ELSA=y +CONFIG_HISAX_DIEHLDIVA=y +CONFIG_HISAX_SEDLBAUER=y +CONFIG_HISAX_NETJET=y +CONFIG_HISAX_NETJET_U=y +CONFIG_HISAX_NICCY=y +CONFIG_HISAX_BKM_A4T=y +CONFIG_HISAX_SCT_QUADRO=y +CONFIG_HISAX_GAZEL=y +CONFIG_HISAX_HFC_PCI=y +CONFIG_HISAX_W6692=y +CONFIG_HISAX_HFC_SX=y +CONFIG_HISAX_ENTERNOW_PCI=y +# CONFIG_HISAX_DEBUG is not set +# CONFIG_HISAX_TELES_CS is not set +CONFIG_HISAX_SEDLBAUER_CS=m +CONFIG_HISAX_ELSA_CS=m +# CONFIG_HISAX_AVM_A1_CS is not set +CONFIG_HISAX_ST5481=m +CONFIG_HISAX_FRITZ_PCIPNP=m +CONFIG_USB_AUERISDN=m + +# +# Active ISDN cards +# +CONFIG_ISDN_DRV_ICN=m +CONFIG_ISDN_DRV_PCBIT=m +CONFIG_ISDN_DRV_SC=m +CONFIG_ISDN_DRV_ACT2000=m +CONFIG_ISDN_DRV_EICON=y +CONFIG_ISDN_DRV_EICON_DIVAS=m +CONFIG_ISDN_DRV_EICON_OLD=m +CONFIG_ISDN_DRV_EICON_PCI=y +CONFIG_ISDN_DRV_EICON_ISA=y +CONFIG_ISDN_DRV_TPAM=m +CONFIG_ISDN_CAPI=m +CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y +CONFIG_ISDN_CAPI_MIDDLEWARE=y +CONFIG_ISDN_CAPI_CAPI20=m +CONFIG_ISDN_CAPI_CAPIFS_BOOL=y +CONFIG_ISDN_CAPI_CAPIFS=m +CONFIG_ISDN_CAPI_CAPIDRV=m +# CONFIG_ISDN_DRV_AVMB1_B1ISA is not set +CONFIG_ISDN_DRV_AVMB1_B1PCI=m +CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y +# CONFIG_ISDN_DRV_AVMB1_T1ISA is not set +CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m +CONFIG_ISDN_DRV_AVMB1_AVM_CS=m +CONFIG_ISDN_DRV_AVMB1_T1PCI=m +CONFIG_ISDN_DRV_AVMB1_C4=m +CONFIG_HYSDN=m +CONFIG_HYSDN_CAPI=y + +# +# Input core support +# +CONFIG_INPUT=m +CONFIG_INPUT_KEYBDEV=m +CONFIG_INPUT_MOUSEDEV=m +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_EVDEV=m + +# +# Character devices +# +CONFIG_VT=y +CONFIG_ECC=m +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SERIAL_EXTENDED=y +CONFIG_SERIAL_MANY_PORTS=y +CONFIG_SERIAL_SHARE_IRQ=y +# CONFIG_SERIAL_DETECT_IRQ is not set +CONFIG_SERIAL_MULTIPORT=y +CONFIG_HUB6=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_COMPUTONE=m +CONFIG_ROCKETPORT=m +CONFIG_CYCLADES=m +# CONFIG_CYZ_INTR is not set +# CONFIG_ESPSERIAL is not set +CONFIG_MOXA_INTELLIO=m +CONFIG_MOXA_SMARTIO=m +CONFIG_ISI=m +CONFIG_SYNCLINK=m +CONFIG_SYNCLINKMP=m +CONFIG_N_HDLC=m +CONFIG_RISCOM8=m +CONFIG_STALDRV=y +CONFIG_STALLION=m +CONFIG_ISTALLION=m +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=256 +CONFIG_PRINTER=m +# CONFIG_LP_CONSOLE is not set +CONFIG_PPDEV=m +CONFIG_TIPAR=m + +# +# I2C support +# +CONFIG_I2C=m +CONFIG_I2C_ALGOBIT=m +CONFIG_I2C_PHILIPSPAR=m +# CONFIG_I2C_ELV is not set +# CONFIG_I2C_VELLEMAN is not set +CONFIG_SCx200_I2C=m +CONFIG_SCx200_I2C_SCL=12 +CONFIG_SCx200_I2C_SDA=13 +CONFIG_SCx200_ACB=m +CONFIG_I2C_ALGOPCF=m +# CONFIG_I2C_ELEKTOR is not set +CONFIG_I2C_MAINBOARD=y +CONFIG_I2C_ALI1535=m +CONFIG_I2C_ALI15X3=m +CONFIG_I2C_HYDRA=m +CONFIG_I2C_AMD756=m +CONFIG_I2C_AMD8111=m +CONFIG_I2C_I801=m +# CONFIG_I2C_I810 is not set +CONFIG_I2C_PIIX4=m +CONFIG_I2C_SIS5595=m +CONFIG_I2C_SIS630=m +CONFIG_I2C_SIS645=m +CONFIG_I2C_SAVAGE4=m +CONFIG_I2C_VIA=m +CONFIG_I2C_VIAPRO=m +CONFIG_I2C_VOODOO3=m +CONFIG_I2C_ISA=m +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_PROC=m + +# +# Hardware sensors support +# +CONFIG_SENSORS=y +CONFIG_SENSORS_ADM1021=m +CONFIG_SENSORS_ADM1024=m +CONFIG_SENSORS_ADM1025=m +CONFIG_SENSORS_ADM1026=m +CONFIG_SENSORS_ADM9240=m +CONFIG_SENSORS_DS1621=m +CONFIG_SENSORS_FSCPOS=m +CONFIG_SENSORS_FSCSCY=m +CONFIG_SENSORS_GL518SM=m +CONFIG_SENSORS_GL520SM=m +CONFIG_SENSORS_MAXILIFE=m +CONFIG_SENSORS_IT87=m +CONFIG_SENSORS_MTP008=m +CONFIG_SENSORS_LM75=m +CONFIG_SENSORS_LM78=m +CONFIG_SENSORS_LM80=m +CONFIG_SENSORS_LM85=m +CONFIG_SENSORS_LM87=m +# CONFIG_SENSORS_LM92 is not set +CONFIG_SENSORS_SIS5595=m +# CONFIG_SENSORS_SMSC47M1 is not set +CONFIG_SENSORS_THMC50=m +CONFIG_SENSORS_VIA686A=m +CONFIG_SENSORS_VT1211=m +CONFIG_SENSORS_VT8231=m +CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_OTHER=y +CONFIG_SENSORS_BT869=m +CONFIG_SENSORS_DDCMON=m +CONFIG_SENSORS_EEPROM=m +CONFIG_SENSORS_MATORB=m +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCF8591 is not set + +# +# Mice +# +CONFIG_BUSMOUSE=m +CONFIG_ATIXL_BUSMOUSE=m +CONFIG_LOGIBUSMOUSE=m +CONFIG_MS_BUSMOUSE=m +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +CONFIG_82C710_MOUSE=y +CONFIG_PC110_PAD=m +CONFIG_MK712_MOUSE=m + +# +# Joysticks +# +CONFIG_INPUT_GAMEPORT=m +CONFIG_INPUT_NS558=m +CONFIG_INPUT_LIGHTNING=m +CONFIG_INPUT_PCIGAME=m +CONFIG_INPUT_CS461X=m +CONFIG_INPUT_EMU10K1=m +CONFIG_INPUT_FM801=m +CONFIG_INPUT_SERIO=m +CONFIG_INPUT_SERPORT=m +CONFIG_INPUT_ANALOG=m +CONFIG_INPUT_A3D=m +CONFIG_INPUT_ADI=m +CONFIG_INPUT_COBRA=m +CONFIG_INPUT_GF2K=m +CONFIG_INPUT_GRIP=m +CONFIG_INPUT_INTERACT=m +CONFIG_INPUT_TMDC=m +CONFIG_INPUT_SIDEWINDER=m +CONFIG_INPUT_IFORCE_USB=m +CONFIG_INPUT_IFORCE_232=m +CONFIG_INPUT_WARRIOR=m +CONFIG_INPUT_MAGELLAN=m +CONFIG_INPUT_SPACEORB=m +CONFIG_INPUT_SPACEBALL=m +CONFIG_INPUT_STINGER=m +CONFIG_INPUT_DB9=m +CONFIG_INPUT_GAMECON=m +CONFIG_INPUT_TURBOGRAFX=m +CONFIG_QIC02_TAPE=m +CONFIG_QIC02_DYNCONF=y +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_PANIC_EVENT=y +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_KCS=m +CONFIG_IPMI_WATCHDOG=m + +# +# Watchdog Cards +# +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_ACQUIRE_WDT=m +CONFIG_ADVANTECH_WDT=m +CONFIG_ALIM1535_WDT=m +CONFIG_ALIM7101_WDT=m +CONFIG_SC520_WDT=m +CONFIG_PCWATCHDOG=m +CONFIG_EUROTECH_WDT=m +CONFIG_IB700_WDT=m +CONFIG_WAFER_WDT=m +# CONFIG_I810_TCO is not set +CONFIG_MIXCOMWD=m +CONFIG_60XX_WDT=m +CONFIG_SC1200_WDT=m +CONFIG_SCx200_WDT=m +CONFIG_SOFT_WATCHDOG=m +CONFIG_W83877F_WDT=m +CONFIG_WDT=m +CONFIG_WDTPCI=m +CONFIG_WDT_501=y +CONFIG_WDT_501_FAN=y +CONFIG_MACHZ_WDT=m +CONFIG_DEADMAN=m +CONFIG_HANGCHECK_TIMER=m +CONFIG_AMD7XX_TCO=m +CONFIG_SCx200_GPIO=m +CONFIG_AMD_RNG=m +CONFIG_INTEL_RNG=m +CONFIG_AMD_PM768=m +CONFIG_NVRAM=m +CONFIG_RTC=y +CONFIG_DTLK=m +CONFIG_R3964=m +CONFIG_APPLICOM=m + +# +# Ftape, the floppy tape device driver +# +CONFIG_FTAPE=m +CONFIG_ZFTAPE=m +CONFIG_ZFT_DFLT_BLK_SZ=10240 +CONFIG_ZFT_COMPRESSOR=m +CONFIG_FT_NR_BUFFERS=3 +CONFIG_FT_PROC_FS=y +CONFIG_FT_NORMAL_DEBUG=y +# CONFIG_FT_FULL_DEBUG is not set +# CONFIG_FT_NO_TRACE is not set +# CONFIG_FT_NO_TRACE_AT_ALL is not set +CONFIG_FT_STD_FDC=y +# CONFIG_FT_MACH2 is not set +# CONFIG_FT_PROBE_FC10 is not set +# CONFIG_FT_ALT_FDC is not set +CONFIG_FT_FDC_THR=8 +CONFIG_FT_FDC_MAX_RATE=2000 +CONFIG_FT_ALPHA_CLOCK=0 +CONFIG_AGP=y +CONFIG_AGP_AMD_8151=y +# CONFIG_AGP_INTEL is not set +# CONFIG_AGP_I810 is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_AMD is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_NVIDIA is not set +CONFIG_DRM=y +# CONFIG_DRM_OLD is not set +CONFIG_DRM_NEW=y +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_I810=m +# CONFIG_DRM_I810_XFREE_41 is not set +CONFIG_DRM_I830=m +CONFIG_DRM_MGA=m +# CONFIG_DRM_SIS is not set + +# +# PCMCIA character devices +# +CONFIG_PCMCIA_SERIAL_CS=m +CONFIG_SYNCLINK_CS=m +CONFIG_MWAVE=m + +# +# Multimedia devices +# +CONFIG_VIDEO_DEV=m + +# +# Video For Linux +# +CONFIG_VIDEO_PROC_FS=y +CONFIG_I2C_PARPORT=m +CONFIG_VIDEO_BT848=m +CONFIG_VIDEO_PMS=m +CONFIG_VIDEO_BWQCAM=m +CONFIG_VIDEO_CQCAM=m +CONFIG_VIDEO_W9966=m +CONFIG_VIDEO_CPIA=m +CONFIG_VIDEO_CPIA_PP=m +CONFIG_VIDEO_CPIA_USB=m +CONFIG_VIDEO_SAA5249=m +CONFIG_TUNER_3036=m +CONFIG_VIDEO_STRADIS=m +CONFIG_VIDEO_ZORAN=m +CONFIG_VIDEO_ZORAN_BUZ=m +CONFIG_VIDEO_ZORAN_DC10=m +CONFIG_VIDEO_ZORAN_LML33=m +CONFIG_VIDEO_ZR36120=m +# CONFIG_VIDEO_MEYE is not set + +# +# Radio Adapters +# +CONFIG_RADIO_GEMTEK_PCI=m +CONFIG_RADIO_MAXIRADIO=m +CONFIG_RADIO_MAESTRO=m +CONFIG_RADIO_MIROPCM20=m +# CONFIG_DXR3 is not set + +# +# File systems +# +CONFIG_QUOTA=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_QIFACE_COMPAT=y +# CONFIG_QIFACE_V1 is not set +CONFIG_QIFACE_V2=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +CONFIG_REISERFS_PROC_INFO=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_XATTR_USER=y +CONFIG_REISERFS_FS_XATTR_TRUSTED=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_ADFS_FS=m +# CONFIG_ADFS_FS_RW is not set +CONFIG_AFFS_FS=m +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +CONFIG_BFS_FS=m +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_XATTR_SHARING=y +CONFIG_EXT3_FS_XATTR_USER=y +CONFIG_EXT3_FS_XATTR_TRUSTED=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_JBD=y +CONFIG_JBD_DEBUG=y +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_UMSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FATX_FS=m +CONFIG_EFS_FS=m +CONFIG_JFFS_FS=m +CONFIG_JFFS_FS_VERBOSE=0 +CONFIG_JFFS_PROC_FS=y +CONFIG_JFFS2_FS=m +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_CRAMFS=m +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +# CONFIG_JFS_DEBUG is not set +CONFIG_JFS_STATISTICS=y +CONFIG_MINIX_FS=y +CONFIG_VXFS_FS=m +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set +CONFIG_HPFS_FS=m +CONFIG_PROC_FS=y +CONFIG_PROC_CONFIG=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +CONFIG_QNX4FS_FS=m +# CONFIG_QNX4FS_RW is not set +CONFIG_ROMFS_FS=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_XATTR_SHARING=y +CONFIG_EXT2_FS_XATTR_USER=y +CONFIG_EXT2_FS_XATTR_TRUSTED=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_SYSV_FS=m +CONFIG_UDF_FS=m +CONFIG_UDF_RW=y +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set +CONFIG_XFS_FS=m +CONFIG_XFS_POSIX_ACL=y +# CONFIG_XFS_RT is not set +CONFIG_XFS_QUOTA=y +CONFIG_XFS_DMAPI=y +# CONFIG_XFS_DEBUG is not set +# CONFIG_PAGEBUF_DEBUG is not set +# CONFIG_OCFS_FS is not set + +# +# Network File Systems +# +CONFIG_CODA_FS=m +CONFIG_INTERMEZZO_FS=m +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_ACL=y +CONFIG_NFS_DIRECTIO=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_ACL=y +CONFIG_NFSD_TCP=y +CONFIG_NFSD_FHALIAS=y +CONFIG_SUNRPC=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_CIFS=m +CONFIG_SMB_FS=m +CONFIG_SMB_NLS_DEFAULT=y +CONFIG_SMB_NLS_REMOTE="cp437" +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_SMALLDOS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_ZISOFS_FS=y +CONFIG_FS_MBCACHE=y +CONFIG_FS_POSIX_ACL=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +CONFIG_ATARI_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_XBOX_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +# CONFIG_MINIX_SUBPARTITION is not set +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +CONFIG_ULTRIX_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_SMB_NLS=y +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y +CONFIG_VIDEO_SELECT=y +CONFIG_MDA_CONSOLE=m + +# +# Frame-buffer support +# +CONFIG_FB=y +CONFIG_DUMMY_CONSOLE=y +CONFIG_FB_RIVA=m +CONFIG_FB_CLGEN=m +CONFIG_FB_PM2=m +CONFIG_FB_PM2_FIFO_DISCONNECT=y +CONFIG_FB_PM2_PCI=y +CONFIG_FB_PM3=m +# CONFIG_UNICON is not set +# CONFIG_UNICON_GB is not set +# CONFIG_UNICON_GBK is not set +# CONFIG_UNICON_BIG5 is not set +# CONFIG_UNICON_JIS is not set +# CONFIG_UNICON_KSCM is not set +CONFIG_FB_CYBER2000=m +CONFIG_FB_VESA=y +CONFIG_FB_VGA16=m +CONFIG_FB_HGA=m +CONFIG_VIDEO_SELECT=y +CONFIG_FB_MATROX=m +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G450=y +CONFIG_FB_MATROX_G100=y +CONFIG_FB_MATROX_I2C=m +CONFIG_FB_MATROX_MAVEN=m +CONFIG_FB_MATROX_PROC=m +CONFIG_FB_MATROX_MULTIHEAD=y +CONFIG_FB_ATY=m +CONFIG_FB_ATY_GX=y +CONFIG_FB_ATY_CT=y +CONFIG_FB_RADEON=m +CONFIG_FB_ATY128=m +# CONFIG_FB_INTEL is not set +# CONFIG_FB_SIS is not set +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_VMWARE_SVGA=m +CONFIG_FB_3DFX=m +CONFIG_FB_VOODOO1=m +CONFIG_FB_TRIDENT=m +# CONFIG_FB_VIRTUAL is not set +CONFIG_FBCON_SPLASHSCREEN=y +CONFIG_FBCON_CFB16=y +CONFIG_FBCON_ADVANCED=y +CONFIG_FBCON_MFB=m +CONFIG_FBCON_CFB2=m +CONFIG_FBCON_CFB4=m +CONFIG_FBCON_CFB8=y +CONFIG_FBCON_CFB24=y +CONFIG_FBCON_CFB32=y +CONFIG_FBCON_AFB=m +CONFIG_FBCON_ILBM=m +CONFIG_FBCON_IPLAN2P2=m +CONFIG_FBCON_IPLAN2P4=m +CONFIG_FBCON_IPLAN2P8=m +CONFIG_FBCON_MAC=m +CONFIG_FBCON_VGA_PLANES=m +CONFIG_FBCON_VGA=m +CONFIG_FBCON_HGA=m +# CONFIG_FBCON_FONTWIDTH8_ONLY is not set +CONFIG_FBCON_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +# CONFIG_FONT_SUN8x16 is not set +# CONFIG_FONT_SUN12x22 is not set +# CONFIG_FONT_6x11 is not set +# CONFIG_FONT_PEARL_8x8 is not set +# CONFIG_FONT_ACORN_8x8 is not set + +# +# Sound +# +CONFIG_SOUND=m +CONFIG_SOUND_ALI5455=m +CONFIG_SOUND_BT878=m +CONFIG_SOUND_CMPCI=m +CONFIG_SOUND_CMPCI_FM=y +CONFIG_SOUND_CMPCI_FMIO=388 +CONFIG_SOUND_CMPCI_FMIO=388 +CONFIG_SOUND_CMPCI_MIDI=y +CONFIG_SOUND_CMPCI_MPUIO=330 +CONFIG_SOUND_CMPCI_JOYSTICK=y +CONFIG_SOUND_CMPCI_CM8738=y +# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set +CONFIG_SOUND_CMPCI_SPDIFLOOP=y +CONFIG_SOUND_CMPCI_SPEAKERS=2 +CONFIG_SOUND_EMU10K1=m +CONFIG_MIDI_EMU10K1=y +CONFIG_SOUND_FUSION=m +CONFIG_SOUND_CS4281=m +CONFIG_SOUND_ES1370=m +CONFIG_SOUND_ES1371=m +CONFIG_SOUND_ESSSOLO1=m +CONFIG_SOUND_MAESTRO=m +CONFIG_SOUND_MAESTRO3=m +CONFIG_SOUND_FORTE=m +CONFIG_SOUND_ICH=m +CONFIG_SOUND_RME96XX=m +CONFIG_SOUND_SONICVIBES=m +CONFIG_SOUND_TRIDENT=m +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_VIA82CXXX=m +CONFIG_MIDI_VIA82CXXX=y +CONFIG_SOUND_OSS=m +CONFIG_SOUND_TRACEINIT=y +CONFIG_SOUND_DMAP=y +CONFIG_SOUND_AD1816=m +CONFIG_SOUND_AD1889=m +CONFIG_SOUND_SGALAXY=m +CONFIG_SOUND_ADLIB=m +CONFIG_SOUND_ACI_MIXER=m +CONFIG_SOUND_CS4232=m +CONFIG_SOUND_SSCAPE=m +CONFIG_SOUND_GUS=m +# CONFIG_SOUND_GUS16 is not set +CONFIG_SOUND_GUSMAX=y +CONFIG_SOUND_VMIDI=m +CONFIG_SOUND_TRIX=m +CONFIG_SOUND_MSS=m +CONFIG_SOUND_MPU401=m +CONFIG_SOUND_NM256=m +CONFIG_SOUND_MAD16=m +CONFIG_MAD16_OLDCARD=y +CONFIG_SOUND_PAS=m +# CONFIG_PAS_JOYSTICK is not set +CONFIG_SOUND_PSS=m +CONFIG_PSS_MIXER=y +# CONFIG_PSS_HAVE_BOOT is not set +CONFIG_SOUND_SB=m +CONFIG_SOUND_AWE32_SYNTH=m +CONFIG_SOUND_KAHLUA=m +CONFIG_SOUND_WAVEFRONT=m +CONFIG_SOUND_MAUI=m +CONFIG_SOUND_YM3812=m +CONFIG_SOUND_OPL3SA1=m +CONFIG_SOUND_OPL3SA2=m +CONFIG_SOUND_YMFPCI=m +CONFIG_SOUND_YMFPCI_LEGACY=y +CONFIG_SOUND_UART6850=m +CONFIG_SOUND_AEDSP16=m +CONFIG_SC6600=y +CONFIG_SC6600_JOY=y +CONFIG_SC6600_CDROM=4 +CONFIG_SC6600_CDROMBASE=0 +CONFIG_AEDSP16_SBPRO=y +CONFIG_AEDSP16_MPU401=y +CONFIG_SOUND_TVMIXER=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_BIT32_EMUL=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DEBUG=y +CONFIG_SND_DEBUG_MEMORY=y +# CONFIG_SND_DEBUG_DETECT is not set + +# +# Generic devices +# +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m +CONFIG_SND_SERIALMIDI=m + +# +# PCI devices +# +CONFIG_SND_ALI5451=m +CONFIG_SND_CS46XX=m +CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_CS4281=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_PDPLUS=m +CONFIG_SND_KORG1212=m +CONFIG_SND_NM256=m +CONFIG_SND_RME32=m +CONFIG_SND_RME96=m +CONFIG_SND_RME9652=m +CONFIG_SND_HDSP=m +CONFIG_SND_TRIDENT=m +CONFIG_SND_YMFPCI=m +CONFIG_SND_ALS4000=m +CONFIG_SND_CMIPCI=m +CONFIG_SND_ENS1370=m +CONFIG_SND_ENS1371=m +CONFIG_SND_ES1938=m +CONFIG_SND_ES1968=m +CONFIG_SND_MAESTRO3=m +CONFIG_SND_FM801=m +CONFIG_SND_ICE1712=m +CONFIG_SND_ICE1724=m +CONFIG_SND_INTEL8X0=m +CONFIG_SND_SONICVIBES=m +CONFIG_SND_VIA82XX=m +CONFIG_SND_VX222=m +CONFIG_SND_MIXART=m +CONFIG_SND_AZT3328=m + +# +# ALSA USB devices +# +CONFIG_SND_USB_AUDIO=m + +# +# ALSA PCMCIA devices +# +CONFIG_SND_VXPOCKET=m +CONFIG_SND_VXP440=m + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_UHCI=m +CONFIG_USB_UHCI_ALT=m +CONFIG_USB_OHCI=m +CONFIG_USB_AUDIO=m +CONFIG_USB_EMI26=m +CONFIG_USB_MIDI=m +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_HP8200e=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +CONFIG_USB_HIDDEV=y +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +CONFIG_USB_AIPTEK=m +CONFIG_USB_WACOM=m +CONFIG_USB_KBTAB=m +CONFIG_USB_POWERMATE=m +CONFIG_USB_DC2XX=m +CONFIG_USB_MDC800=m +CONFIG_USB_SCANNER=m +CONFIG_USB_MICROTEK=m +CONFIG_USB_HPUSBSCSI=m +CONFIG_USB_IBMCAM=m +CONFIG_USB_KONICAWC=m +CONFIG_USB_OV511=m +CONFIG_USB_PWC=m +CONFIG_USB_SE401=m +CONFIG_USB_STV680=m +CONFIG_USB_VICAM=m +CONFIG_USB_DSBR=m +CONFIG_USB_DABUSB=m +# CONFIG_USB_LOGITECH_CAM is not set +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_KAWETH=m +CONFIG_USB_CATC=m +CONFIG_USB_AX8817X=m +CONFIG_USB_CDCETHER=m +CONFIG_USB_USBNET=m +CONFIG_USB_USBDNET=m +CONFIG_USB_USBDNET_VENDOR=0000 +CONFIG_USB_USBDNET_PRODUCT=0000 +CONFIG_USB_USBDNET_CLASS=0000 +CONFIG_USB_USBDNET_SUBCLASS=0000 +CONFIG_USB_USS720=m + +# +# USB Serial Converter support +# +CONFIG_USB_SERIAL=m +# CONFIG_USB_SERIAL_DEBUG is not set +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_WHITEHEAT=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +CONFIG_USB_SERIAL_KEYSPAN_USA28=y +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +CONFIG_USB_SERIAL_KEYSPAN_USA19=y +CONFIG_USB_SERIAL_KEYSPAN_USA18X=y +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OMNINET=m +CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SAFE_SERIAL_VENDOR=0000 +CONFIG_USB_SAFE_SERIAL_PRODUCT=0000 +CONFIG_USB_RIO500=m +CONFIG_USB_AUERSWALD=m +CONFIG_USB_TIGL=m +CONFIG_USB_BRLVGER=m +CONFIG_USB_LCD=m +# CONFIG_USB_SPEEDTOUCH is not set + +# +# Bluetooth support +# +CONFIG_BLUEZ=m +CONFIG_BLUEZ_L2CAP=m +CONFIG_BLUEZ_SCO=m +CONFIG_BLUEZ_RFCOMM=m +CONFIG_BLUEZ_RFCOMM_TTY=y +CONFIG_BLUEZ_BNEP=m +CONFIG_BLUEZ_BNEP_MC_FILTER=y +CONFIG_BLUEZ_BNEP_PROTO_FILTER=y +CONFIG_BLUEZ_CMTP=m + +# +# Bluetooth device drivers +# +CONFIG_BLUEZ_HCIUSB=m +CONFIG_BLUEZ_USB_SCO=y +# CONFIG_BLUEZ_USB_ZERO_PACKET is not set +CONFIG_BLUEZ_HCIUART=m +CONFIG_BLUEZ_HCIUART_H4=y +CONFIG_BLUEZ_HCIUART_BCSP=y +CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y +CONFIG_BLUEZ_HCIBFUSB=m +CONFIG_BLUEZ_HCIDTL1=m +CONFIG_BLUEZ_HCIBT3C=m +CONFIG_BLUEZ_HCIBLUECARD=m +CONFIG_BLUEZ_HCIBTUART=m +CONFIG_BLUEZ_HCIVHCI=m + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SLAB is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_CHECKING is not set +# CONFIG_INIT_DEBUG is not set +# CONFIG_X86_REMOTE_DEBUG is not set +# CONFIG_IOMMU_DEBUG is not set +# CONFIG_IOMMU_LEAK is not set +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_KALLSYMS=y + +# +# Library routines +# +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_QSORT=y +CONFIG_FW_LOADER=m + +# +# Build options +# +CONFIG_SUSE_KERNEL=y +CONFIG_CFGNAME="default" +CONFIG_RELEASE=171 diff --git a/lustre/kernel_patches/patches/2.6.0-mm2.patch b/lustre/kernel_patches/patches/2.6.0-mm2.patch deleted file mode 100644 index d01f3c7..0000000 --- a/lustre/kernel_patches/patches/2.6.0-mm2.patch +++ /dev/null @@ -1,203869 +0,0 @@ ---- linux-2.6.0/arch/alpha/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/alpha/kernel/irq.c 2003-12-28 23:22:10.000000000 -0800 -@@ -252,9 +252,11 @@ static int - irq_affinity_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) - { -- if (count < HEX_DIGITS+1) -+ int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); -+ if (count - len < 2) - return -EINVAL; -- return sprintf (page, "%016lx\n", irq_affinity[(long)data]); -+ len += sprintf(page + len, "\n"); -+ return len; - } - - static unsigned int -@@ -331,10 +333,11 @@ static int - prof_cpu_mask_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) - { -- unsigned long *mask = (unsigned long *) data; -- if (count < HEX_DIGITS+1) -+ int len = cpumask_snprintf(page, count, *(cpumask_t *)data); -+ if (count - len < 2) - return -EINVAL; -- return sprintf (page, "%016lx\n", *mask); -+ len += sprintf(page + len, "\n"); -+ return len; - } - - static int -@@ -529,19 +532,21 @@ show_interrupts(struct seq_file *p, void - #ifdef CONFIG_SMP - int j; - #endif -- int i; -+ int i = *(loff_t *) v; - struct irqaction * action; - unsigned long flags; - - #ifdef CONFIG_SMP -- seq_puts(p, " "); -- for (i = 0; i < NR_CPUS; i++) -- if (cpu_online(i)) -- seq_printf(p, "CPU%d ", i); -- seq_putc(p, '\n'); -+ if (i == 0) { -+ seq_puts(p, " "); -+ for (i = 0; i < NR_CPUS; i++) -+ if (cpu_online(i)) -+ seq_printf(p, "CPU%d ", i); -+ seq_putc(p, '\n'); -+ } - #endif - -- for (i = 0; i < ACTUAL_NR_IRQS; i++) { -+ if (i < ACTUAL_NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; - if (!action) -@@ -568,15 +573,16 @@ show_interrupts(struct seq_file *p, void - seq_putc(p, '\n'); - unlock: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); -- } -+ } else if (i == ACTUAL_NR_IRQS) { - #ifdef CONFIG_SMP -- seq_puts(p, "IPI: "); -- for (i = 0; i < NR_CPUS; i++) -- if (cpu_online(i)) -- seq_printf(p, "%10lu ", cpu_data[i].ipi_count); -- seq_putc(p, '\n'); -+ seq_puts(p, "IPI: "); -+ for (i = 0; i < NR_CPUS; i++) -+ if (cpu_online(i)) -+ seq_printf(p, "%10lu ", cpu_data[i].ipi_count); -+ seq_putc(p, '\n'); - #endif -- seq_printf(p, "ERR: %10lu\n", irq_err_count); -+ seq_printf(p, "ERR: %10lu\n", irq_err_count); -+ } - return 0; - } - ---- linux-2.6.0/arch/alpha/kernel/traps.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/alpha/kernel/traps.c 2003-12-28 23:22:11.000000000 -0800 -@@ -636,6 +636,7 @@ do_entUna(void * va, unsigned long opcod - lock_kernel(); - printk("Bad unaligned kernel access at %016lx: %p %lx %ld\n", - pc, va, opcode, reg); -+ dump_stack(); - do_exit(SIGSEGV); - - got_exception: ---- linux-2.6.0/arch/arm26/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/arm26/kernel/irq.c 2003-12-28 23:22:06.000000000 -0800 -@@ -135,10 +135,10 @@ void enable_irq(unsigned int irq) - - int show_interrupts(struct seq_file *p, void *v) - { -- int i; -+ int i = *(loff_t *) v; - struct irqaction * action; - -- for (i = 0 ; i < NR_IRQS ; i++) { -+ if (i < NR_IRQS) { - action = irq_desc[i].action; - if (!action) - continue; -@@ -148,10 +148,10 @@ int show_interrupts(struct seq_file *p, - seq_printf(p, ", %s", action->name); - } - seq_putc(p, '\n'); -+ } else if (i == NR_IRQS) { -+ show_fiq_list(p, v); -+ seq_printf(p, "Err: %10lu\n", irq_err_count); - } -- -- show_fiq_list(p, v); -- seq_printf(p, "Err: %10lu\n", irq_err_count); - return 0; - } - ---- linux-2.6.0/arch/arm/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/arm/kernel/irq.c 2003-12-28 23:22:06.000000000 -0800 -@@ -169,11 +169,11 @@ void disable_irq_wake(unsigned int irq) - - int show_interrupts(struct seq_file *p, void *v) - { -- int i; -+ int i = *(loff_t *) v; - struct irqaction * action; - unsigned long flags; - -- for (i = 0 ; i < NR_IRQS ; i++) { -+ if (i < NR_IRQS) { - spin_lock_irqsave(&irq_controller_lock, flags); - action = irq_desc[i].action; - if (!action) -@@ -187,12 +187,12 @@ int show_interrupts(struct seq_file *p, - seq_putc(p, '\n'); - unlock: - spin_unlock_irqrestore(&irq_controller_lock, flags); -- } -- -+ } else if (i == NR_IRQS) { - #ifdef CONFIG_ARCH_ACORN -- show_fiq_list(p, v); -+ show_fiq_list(p, v); - #endif -- seq_printf(p, "Err: %10lu\n", irq_err_count); -+ seq_printf(p, "Err: %10lu\n", irq_err_count); -+ } - return 0; - } - ---- linux-2.6.0/arch/arm/mach-sa1100/Kconfig 2003-06-14 12:18:52.000000000 -0700 -+++ 25/arch/arm/mach-sa1100/Kconfig 2003-12-28 23:23:06.000000000 -0800 -@@ -304,7 +304,7 @@ config SA1100_YOPY - depends on ARCH_SA1100 - help - Say Y here to support the Yopy PDA. Product information at -- . See Documentation/arm/SA110/Yopy -+ . See Documentation/arm/SA1100/Yopy - for more. - - config SA1100_STORK ---- linux-2.6.0/arch/arm/Makefile 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/arm/Makefile 2003-12-28 23:21:55.000000000 -0800 -@@ -14,8 +14,6 @@ OBJCOPYFLAGS :=-O binary -R .note -R .co - GZFLAGS :=-9 - #CFLAGS +=-pipe - --CFLAGS :=$(CFLAGS:-O2=-Os) -- - ifeq ($(CONFIG_FRAME_POINTER),y) - CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog - endif ---- linux-2.6.0/arch/cris/kernel/irq.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/cris/kernel/irq.c 2003-12-28 23:22:06.000000000 -0800 -@@ -89,11 +89,11 @@ static struct irqaction *irq_action[NR_I - - int show_interrupts(struct seq_file *p, void *v) - { -- int i; -+ int i = *(loff_t *) v; - struct irqaction * action; - unsigned long flags; - -- for (i = 0; i < NR_IRQS; i++) { -+ if (i < NR_IRQS) { - local_irq_save(flags); - action = irq_action[i]; - if (!action) ---- linux-2.6.0/arch/h8300/Kconfig 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/h8300/Kconfig 2003-12-28 23:21:55.000000000 -0800 -@@ -5,6 +5,10 @@ - - mainmenu "uClinux/h8300 (w/o MMU) Kernel Configuration" - -+config H8300 -+ bool -+ default y -+ - config MMU - bool - default n ---- linux-2.6.0/arch/h8300/Makefile 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/h8300/Makefile 2003-12-28 23:21:55.000000000 -0800 -@@ -34,7 +34,7 @@ cflags-$(CONFIG_CPU_H8S) := -ms - ldflags-$(CONFIG_CPU_H8S) := -mh8300self - - CFLAGS += $(cflags-y) --CFLAGS += -mint32 -fno-builtin -Os -+CFLAGS += -mint32 -fno-builtin - CFLAGS += -g - CFLAGS += -D__linux__ - CFLAGS += -DUTS_SYSNAME=\"uClinux\" ---- linux-2.6.0/arch/h8300/platform/h8300h/ints.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/h8300/platform/h8300h/ints.c 2003-12-28 23:22:06.000000000 -0800 -@@ -228,9 +228,9 @@ asmlinkage void process_int(int vec, str - - int show_interrupts(struct seq_file *p, void *v) - { -- int i; -+ int i = *(loff_t *) v; - -- for (i = 0; i < NR_IRQS; i++) { -+ if (i < NR_IRQS) { - if (irq_list[i]) { - seq_printf(p, "%3d: %10u ",i,irq_list[i]->count); - seq_printf(p, "%s\n", irq_list[i]->devname); ---- linux-2.6.0/arch/h8300/platform/h8s/ints.c 2003-10-17 15:58:03.000000000 -0700 -+++ 25/arch/h8300/platform/h8s/ints.c 2003-12-28 23:22:06.000000000 -0800 -@@ -280,9 +280,9 @@ asmlinkage void process_int(unsigned lon - - int show_interrupts(struct seq_file *p, void *v) - { -- int i; -+ int i = *(loff_t *) v; - -- for (i = 0; i < NR_IRQS; i++) { -+ if (i < NR_IRQS) { - if (irq_list[i]) { - seq_printf(p, "%3d: %10u ",i,irq_list[i]->count); - seq_printf(p, "%s\n", irq_list[i]->devname); ---- linux-2.6.0/arch/i386/boot/setup.S 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/boot/setup.S 2003-12-28 23:26:36.000000000 -0800 -@@ -162,7 +162,7 @@ cmd_line_ptr: .long 0 # (Header versio - # can be located anywhere in - # low memory 0x10000 or higher. - --ramdisk_max: .long MAXMEM-1 # (Header version 0x0203 or later) -+ramdisk_max: .long __MAXMEM-1 # (Header version 0x0203 or later) - # The highest safe address for - # the contents of an initrd - ---- linux-2.6.0/arch/i386/Kconfig 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/Kconfig 2003-12-28 23:26:36.000000000 -0800 -@@ -115,10 +115,15 @@ config ACPI_SRAT - default y - depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) - -+config X86_SUMMIT_NUMA -+ bool -+ default y -+ depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) -+ - config X86_CYCLONE_TIMER -- bool -- default y -- depends on X86_SUMMIT || X86_GENERICARCH -+ bool -+ default y -+ depends on X86_SUMMIT || X86_GENERICARCH - - config ES7000_CLUSTERED_APIC - bool -@@ -397,6 +402,54 @@ config X86_OOSTORE - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 - default y - -+config X86_4G -+ bool "4 GB kernel-space and 4 GB user-space virtual memory support" -+ help -+ This option is only useful for systems that have more than 1 GB -+ of RAM. -+ -+ The default kernel VM layout leaves 1 GB of virtual memory for -+ kernel-space mappings, and 3 GB of VM for user-space applications. -+ This option ups both the kernel-space VM and the user-space VM to -+ 4 GB. -+ -+ The cost of this option is additional TLB flushes done at -+ system-entry points that transition from user-mode into kernel-mode. -+ I.e. system calls and page faults, and IRQs that interrupt user-mode -+ code. There's also additional overhead to kernel operations that copy -+ memory to/from user-space. The overhead from this is hard to tell and -+ depends on the workload - it can be anything from no visible overhead -+ to 20-30% overhead. A good rule of thumb is to count with a runtime -+ overhead of 20%. -+ -+ The upside is the much increased kernel-space VM, which more than -+ quadruples the maximum amount of RAM supported. Kernels compiled with -+ this option boot on 64GB of RAM and still have more than 3.1 GB of -+ 'lowmem' left. Another bonus is that highmem IO bouncing decreases, -+ if used with drivers that still use bounce-buffers. -+ -+ There's also a 33% increase in user-space VM size - database -+ applications might see a boost from this. -+ -+ But the cost of the TLB flushes and the runtime overhead has to be -+ weighed against the bonuses offered by the larger VM spaces. The -+ dividing line depends on the actual workload - there might be 4 GB -+ systems that benefit from this option. Systems with less than 4 GB -+ of RAM will rarely see a benefit from this option - but it's not -+ out of question, the exact circumstances have to be considered. -+ -+config X86_SWITCH_PAGETABLES -+ def_bool X86_4G -+ -+config X86_4G_VM_LAYOUT -+ def_bool X86_4G -+ -+config X86_UACCESS_INDIRECT -+ def_bool X86_4G -+ -+config X86_HIGH_ENTRY -+ def_bool X86_4G -+ - config HPET_TIMER - bool "HPET Timer Support" - help -@@ -784,6 +837,25 @@ config MTRR - - See for more information. - -+config EFI -+ bool "Boot from EFI support (EXPERIMENTAL)" -+ depends on ACPI -+ default n -+ ---help--- -+ -+ This enables the the kernel to boot on EFI platforms using -+ system configuration information passed to it from the firmware. -+ This also enables the kernel to use any EFI runtime services that are -+ available (such as the EFI variable services). -+ -+ This option is only useful on systems that have EFI firmware -+ and will result in a kernel image that is ~8k larger. In addition, -+ you must use the latest ELILO loader available at -+ ftp.hpl.hp.com/pub/linux-ia64/ in order to take advantage of kernel -+ initialization using EFI information (neither GRUB nor LILO know -+ anything about EFI). However, even with this option, the resultant -+ kernel should continue to boot on existing non-EFI platforms. -+ - config HAVE_DEC_LOCK - bool - depends on (SMP || PREEMPT) && X86_CMPXCHG -@@ -793,7 +865,7 @@ config HAVE_DEC_LOCK - # Summit needs it only when NUMA is on - config BOOT_IOREMAP - bool -- depends on ((X86_SUMMIT || X86_GENERICARCH) && NUMA) -+ depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) - default y - - endmenu -@@ -1030,6 +1102,25 @@ config PCI_DIRECT - depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS) - default y - -+config PCI_USE_VECTOR -+ bool "Vector-based interrupt indexing" -+ depends on X86_LOCAL_APIC -+ default n -+ help -+ This replaces the current existing IRQ-based index interrupt scheme -+ with the vector-base index scheme. The advantages of vector base -+ over IRQ base are listed below: -+ 1) Support MSI implementation. -+ 2) Support future IOxAPIC hotplug -+ -+ Note that this enables MSI, Message Signaled Interrupt, on all -+ MSI capable device functions detected if users also install the -+ MSI patch. Message Signal Interrupt enables an MSI-capable -+ hardware device to send an inbound Memory Write on its PCI bus -+ instead of asserting IRQ signal on device IRQ pin. -+ -+ If you don't know what to do here, say N. -+ - source "drivers/pci/Kconfig" - - config ISA -@@ -1187,6 +1278,15 @@ config DEBUG_PAGEALLOC - This results in a large slowdown, but helps to find certain types - of memory corruptions. - -+config SPINLINE -+ bool "Spinlock inlining" -+ depends on DEBUG_KERNEL -+ help -+ This will change spinlocks from out of line to inline, making them -+ account cost to the callers in readprofile, rather than the lock -+ itself (as ".text.lock.filename"). This can be helpful for finding -+ the callers of locks. -+ - config DEBUG_HIGHMEM - bool "Highmem debugging" - depends on DEBUG_KERNEL && HIGHMEM -@@ -1203,20 +1303,208 @@ config DEBUG_INFO - Say Y here only if you plan to use gdb to debug the kernel. - If you don't debug the kernel, you can say N. - -+config LOCKMETER -+ bool "Kernel lock metering" -+ depends on SMP -+ help -+ Say Y to enable kernel lock metering, which adds overhead to SMP locks, -+ but allows you to see various statistics using the lockstat command. -+ - config DEBUG_SPINLOCK_SLEEP - bool "Sleep-inside-spinlock checking" - help - If you say Y here, various routines which may sleep will become very - noisy if they are called with a spinlock held. - -+config KGDB -+ bool "Include kgdb kernel debugger" -+ depends on DEBUG_KERNEL -+ help -+ If you say Y here, the system will be compiled with the debug -+ option (-g) and a debugging stub will be included in the -+ kernel. This stub communicates with gdb on another (host) -+ computer via a serial port. The host computer should have -+ access to the kernel binary file (vmlinux) and a serial port -+ that is connected to the target machine. Gdb can be made to -+ configure the serial port or you can use stty and setserial to -+ do this. See the 'target' command in gdb. This option also -+ configures in the ability to request a breakpoint early in the -+ boot process. To request the breakpoint just include 'kgdb' -+ as a boot option when booting the target machine. The system -+ will then break as soon as it looks at the boot options. This -+ option also installs a breakpoint in panic and sends any -+ kernel faults to the debugger. For more information see the -+ Documentation/i386/kgdb.txt file. -+ -+choice -+ depends on KGDB -+ prompt "Debug serial port BAUD" -+ default KGDB_115200BAUD -+ help -+ Gdb and the kernel stub need to agree on the baud rate to be -+ used. Some systems (x86 family at this writing) allow this to -+ be configured. -+ -+config KGDB_9600BAUD -+ bool "9600" -+ -+config KGDB_19200BAUD -+ bool "19200" -+ -+config KGDB_38400BAUD -+ bool "38400" -+ -+config KGDB_57600BAUD -+ bool "57600" -+ -+config KGDB_115200BAUD -+ bool "115200" -+endchoice -+ -+config KGDB_PORT -+ hex "hex I/O port address of the debug serial port" -+ depends on KGDB -+ default 3f8 -+ help -+ Some systems (x86 family at this writing) allow the port -+ address to be configured. The number entered is assumed to be -+ hex, don't put 0x in front of it. The standard address are: -+ COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx -+ will tell you what you have. It is good to test the serial -+ connection with a live system before trying to debug. -+ -+config KGDB_IRQ -+ int "IRQ of the debug serial port" -+ depends on KGDB -+ default 4 -+ help -+ This is the irq for the debug port. If everything is working -+ correctly and the kernel has interrupts on a control C to the -+ port should cause a break into the kernel debug stub. -+ -+config DEBUG_INFO -+ bool -+ depends on KGDB -+ default y -+ -+config KGDB_MORE -+ bool "Add any additional compile options" -+ depends on KGDB -+ default n -+ help -+ Saying yes here turns on the ability to enter additional -+ compile options. -+ -+ -+config KGDB_OPTIONS -+ depends on KGDB_MORE -+ string "Additional compile arguments" -+ default "-O1" -+ help -+ This option allows you enter additional compile options for -+ the whole kernel compile. Each platform will have a default -+ that seems right for it. For example on PPC "-ggdb -O1", and -+ for i386 "-O1". Note that by configuring KGDB "-g" is already -+ turned on. In addition, on i386 platforms -+ "-fomit-frame-pointer" is deleted from the standard compile -+ options. -+ -+config NO_KGDB_CPUS -+ int "Number of CPUs" -+ depends on KGDB && SMP -+ default NR_CPUS -+ help -+ -+ This option sets the number of cpus for kgdb ONLY. It is used -+ to prune some internal structures so they look "nice" when -+ displayed with gdb. This is to overcome possibly larger -+ numbers that may have been entered above. Enter the real -+ number to get nice clean kgdb_info displays. -+ -+config KGDB_TS -+ bool "Enable kgdb time stamp macros?" -+ depends on KGDB -+ default n -+ help -+ Kgdb event macros allow you to instrument your code with calls -+ to the kgdb event recording function. The event log may be -+ examined with gdb at a break point. Turning on this -+ capability also allows you to choose how many events to -+ keep. Kgdb always keeps the lastest events. -+ -+choice -+ depends on KGDB_TS -+ prompt "Max number of time stamps to save?" -+ default KGDB_TS_128 -+ -+config KGDB_TS_64 -+ bool "64" -+ -+config KGDB_TS_128 -+ bool "128" -+ -+config KGDB_TS_256 -+ bool "256" -+ -+config KGDB_TS_512 -+ bool "512" -+ -+config KGDB_TS_1024 -+ bool "1024" -+ -+endchoice -+ -+config STACK_OVERFLOW_TEST -+ bool "Turn on kernel stack overflow testing?" -+ depends on KGDB -+ default n -+ help -+ This option enables code in the front line interrupt handlers -+ to check for kernel stack overflow on interrupts and system -+ calls. This is part of the kgdb code on x86 systems. -+ -+config KGDB_CONSOLE -+ bool "Enable serial console thru kgdb port" -+ depends on KGDB -+ default n -+ help -+ This option enables the command line "console=kgdb" option. -+ When the system is booted with this option in the command line -+ all kernel printk output is sent to gdb (as well as to other -+ consoles). For this to work gdb must be connected. For this -+ reason, this command line option will generate a breakpoint if -+ gdb has not yet connected. After the gdb continue command is -+ given all pent up console output will be printed by gdb on the -+ host machine. Neither this option, nor KGDB require the -+ serial driver to be configured. -+ -+config KGDB_SYSRQ -+ bool "Turn on SysRq 'G' command to do a break?" -+ depends on KGDB -+ default y -+ help -+ This option includes an option in the SysRq code that allows -+ you to enter SysRq G which generates a breakpoint to the KGDB -+ stub. This will work if the keyboard is alive and can -+ interrupt the system. Because of constraints on when the -+ serial port interrupt can be enabled, this code may allow you -+ to interrupt the system before the serial port control C is -+ available. Just say yes here. -+ - config FRAME_POINTER - bool "Compile the kernel with frame pointers" -+ default KGDB - help - If you say Y here the resulting kernel image will be slightly larger - and slower, but it will give very useful debugging information. - If you don't debug the kernel, you can say N, but we may not be able - to solve problems without frame pointers. - -+config MAGIC_SYSRQ -+ bool -+ depends on KGDB_SYSRQ -+ default y -+ - config X86_EXTRA_IRQS - bool - depends on X86_LOCAL_APIC || X86_VOYAGER ---- linux-2.6.0/arch/i386/kernel/acpi/boot.c 2003-11-23 19:03:00.000000000 -0800 -+++ 25/arch/i386/kernel/acpi/boot.c 2003-12-28 23:21:57.000000000 -0800 -@@ -26,6 +26,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -40,9 +41,8 @@ - - #define PREFIX "ACPI: " - --extern int acpi_disabled; --extern int acpi_irq; --extern int acpi_ht; -+int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ -+int acpi_ht __initdata = 1; /* enable HT */ - - int acpi_lapic = 0; - int acpi_ioapic = 0; -@@ -249,29 +249,66 @@ acpi_parse_nmi_src ( - - #ifdef CONFIG_ACPI_BUS - /* -- * Set specified PIC IRQ to level triggered mode. -+ * "acpi_pic_sci=level" (current default) -+ * programs the PIC-mode SCI to Level Trigger. -+ * (NO-OP if the BIOS set Level Trigger already) -+ * -+ * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's -+ * it may require Edge Trigger -- use "acpi_pic_sci=edge" -+ * (NO-OP if the BIOS set Edge Trigger already) - * - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers - * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) -- * -- * As the BIOS should have done this for us, -- * print a warning if the IRQ wasn't already set to level. - */ - --void acpi_pic_set_level_irq(unsigned int irq) -+static int __initdata acpi_pic_sci_trigger; /* 0: level, 1: edge */ -+ -+void __init -+acpi_pic_sci_set_trigger(unsigned int irq) - { - unsigned char mask = 1 << (irq & 7); - unsigned int port = 0x4d0 + (irq >> 3); - unsigned char val = inb(port); - -+ -+ printk(PREFIX "IRQ%d SCI:", irq); - if (!(val & mask)) { -- printk(KERN_WARNING PREFIX "IRQ %d was Edge Triggered, " -- "setting to Level Triggerd\n", irq); -- outb(val | mask, port); -+ printk(" Edge"); -+ -+ if (!acpi_pic_sci_trigger) { -+ printk(" set to Level"); -+ outb(val | mask, port); -+ } -+ } else { -+ printk(" Level"); -+ -+ if (acpi_pic_sci_trigger) { -+ printk(" set to Edge"); -+ outb(val | mask, port); -+ } -+ } -+ printk(" Trigger.\n"); -+} -+ -+int __init -+acpi_pic_sci_setup(char *str) -+{ -+ while (str && *str) { -+ if (strncmp(str, "level", 5) == 0) -+ acpi_pic_sci_trigger = 0; /* force level trigger */ -+ if (strncmp(str, "edge", 4) == 0) -+ acpi_pic_sci_trigger = 1; /* force edge trigger */ -+ str = strchr(str, ','); -+ if (str) -+ str += strspn(str, ", \t"); - } -+ return 1; - } -+ -+__setup("acpi_pic_sci=", acpi_pic_sci_setup); -+ - #endif /* CONFIG_ACPI_BUS */ - - -@@ -326,11 +363,48 @@ static int __init acpi_parse_hpet(unsign - } - #endif - -+/* detect the location of the ACPI PM Timer */ -+#ifdef CONFIG_X86_PM_TIMER -+extern u32 pmtmr_ioport; -+ -+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) -+{ -+ struct fadt_descriptor_rev2 *fadt =0; -+ -+ fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); -+ if(!fadt) { -+ printk(KERN_WARNING PREFIX "Unable to map FADT\n"); -+ return 0; -+ } -+ -+ if (fadt->revision >= FADT2_REVISION_ID) { -+ /* FADT rev. 2 */ -+ if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) -+ return 0; -+ -+ pmtmr_ioport = fadt->xpm_tmr_blk.address; -+ } else { -+ /* FADT rev. 1 */ -+ pmtmr_ioport = fadt->V1_pm_tmr_blk; -+ } -+ if (pmtmr_ioport) -+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); -+ return 0; -+} -+#endif -+ -+ - unsigned long __init - acpi_find_rsdp (void) - { - unsigned long rsdp_phys = 0; - -+ if (efi_enabled) { -+ if (efi.acpi20) -+ return __pa(efi.acpi20); -+ else if (efi.acpi) -+ return __pa(efi.acpi); -+ } - /* - * Scan memory looking for the RSDP signature. First search EBDA (low - * memory) paragraphs and then search upper memory (E0000-FFFFF). -@@ -380,8 +454,10 @@ acpi_boot_init (void) - * Initialize the ACPI boot-time table parser. - */ - result = acpi_table_init(); -- if (result) -+ if (result) { -+ acpi_disabled = 1; - return result; -+ } - - result = acpi_blacklisted(); - if (result) { -@@ -462,7 +538,7 @@ acpi_boot_init (void) - * If MPS is present, it will handle them, - * otherwise the system will stay in PIC mode - */ -- if (acpi_disabled || !acpi_irq) { -+ if (acpi_disabled || acpi_noirq) { - return 1; - } - -@@ -504,6 +580,8 @@ acpi_boot_init (void) - - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - -+ acpi_irq_balance_set(NULL); -+ - acpi_ioapic = 1; - - #endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ -@@ -519,5 +597,9 @@ acpi_boot_init (void) - acpi_table_parse(ACPI_HPET, acpi_parse_hpet); - #endif - -+#ifdef CONFIG_X86_PM_TIMER -+ acpi_table_parse(ACPI_FADT, acpi_parse_fadt); -+#endif -+ - return 0; - } ---- linux-2.6.0/arch/i386/kernel/asm-offsets.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/asm-offsets.c 2003-12-28 23:26:36.000000000 -0800 -@@ -4,9 +4,11 @@ - * to extract and format the required data. - */ - -+#include - #include - #include - #include "sigframe.h" -+#include - - #define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -@@ -28,4 +30,17 @@ void foo(void) - - DEFINE(RT_SIGFRAME_sigcontext, - offsetof (struct rt_sigframe, uc.uc_mcontext)); -+ DEFINE(TI_task, offsetof (struct thread_info, task)); -+ DEFINE(TI_exec_domain, offsetof (struct thread_info, exec_domain)); -+ DEFINE(TI_flags, offsetof (struct thread_info, flags)); -+ DEFINE(TI_preempt_count, offsetof (struct thread_info, preempt_count)); -+ DEFINE(TI_addr_limit, offsetof (struct thread_info, addr_limit)); -+ DEFINE(TI_real_stack, offsetof (struct thread_info, real_stack)); -+ DEFINE(TI_virtual_stack, offsetof (struct thread_info, virtual_stack)); -+ DEFINE(TI_user_pgd, offsetof (struct thread_info, user_pgd)); -+ -+ DEFINE(FIX_ENTRY_TRAMPOLINE_0_addr, __fix_to_virt(FIX_ENTRY_TRAMPOLINE_0)); -+ DEFINE(FIX_VSYSCALL_addr, __fix_to_virt(FIX_VSYSCALL)); -+ DEFINE(PAGE_SIZE_asm, PAGE_SIZE); -+ DEFINE(task_thread_db7, offsetof (struct task_struct, thread.debugreg[7])); - } ---- linux-2.6.0/arch/i386/kernel/cpu/common.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/cpu/common.c 2003-12-28 23:26:36.000000000 -0800 -@@ -510,16 +510,20 @@ void __init cpu_init (void) - BUG(); - enter_lazy_tlb(&init_mm, current); - -- load_esp0(t, thread->esp0); -+ load_esp0(t, thread); - set_tss_desc(cpu,t); - cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff; - load_TR_desc(); -- load_LDT(&init_mm.context); -+ if (cpu) -+ load_LDT(&init_mm.context); - - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); - cpu_gdt_table[cpu][GDT_ENTRY_DOUBLEFAULT_TSS].b &= 0xfffffdff; - -+ if (cpu) -+ trap_init_virtual_GDT(); -+ - /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); - ---- linux-2.6.0/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2003-09-08 13:58:55.000000000 -0700 -+++ 25/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2003-12-28 23:22:07.000000000 -0800 -@@ -73,6 +73,16 @@ static struct cpufreq_frequency_table op - { .frequency = CPUFREQ_TABLE_END } - }; - -+/* Ultra Low Voltage Intel Pentium M processor 1000MHz */ -+static struct cpufreq_frequency_table op_1000[] = -+ { -+ OP(600, 844), -+ OP(800, 972), -+ OP(900, 988), -+ OP(1000, 1004), -+ { .frequency = CPUFREQ_TABLE_END } -+ }; -+ - /* Low Voltage Intel Pentium M processor 1.10GHz */ - static struct cpufreq_frequency_table op_1100[] = - { -@@ -165,6 +175,7 @@ static struct cpufreq_frequency_table op - static const struct cpu_model models[] = - { - _CPU( 900, " 900"), -+ CPU(1000), - CPU(1100), - CPU(1200), - CPU(1300), ---- linux-2.6.0/arch/i386/kernel/cpu/intel.c 2003-11-23 19:03:00.000000000 -0800 -+++ 25/arch/i386/kernel/cpu/intel.c 2003-12-28 23:26:36.000000000 -0800 -@@ -1,5 +1,7 @@ -+#include - #include - #include -+ - #include - #include - #include -@@ -8,10 +10,15 @@ - #include - #include - #include -+#include - - #include "cpu.h" - --extern int trap_init_f00f_bug(void); -+#ifdef CONFIG_X86_LOCAL_APIC -+#include -+#include -+#include -+#endif - - #ifdef CONFIG_X86_INTEL_USERCOPY - /* -@@ -157,7 +164,7 @@ static void __init init_intel(struct cpu - - c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { -- trap_init_f00f_bug(); -+ trap_init_virtual_IDT(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } -@@ -240,6 +247,12 @@ static void __init init_intel(struct cpu - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_bit(X86_FEATURE_SEP, c->x86_capability); -+ /* -+ * FIXME: SEP is disabled for 4G/4G for now: -+ */ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ clear_bit(X86_FEATURE_SEP, c->x86_capability); -+#endif - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. -@@ -277,6 +290,7 @@ static void __init init_intel(struct cpu - extern int phys_proc_id[NR_CPUS]; - - u32 eax, ebx, ecx, edx; -+ int index_lsb, index_msb, tmp; - int cpu = smp_processor_id(); - - cpuid(1, &eax, &ebx, &ecx, &edx); -@@ -285,6 +299,8 @@ static void __init init_intel(struct cpu - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1 ) { -+ index_lsb = 0; -+ index_msb = 31; - /* - * At this point we only support two siblings per - * processor package. -@@ -295,13 +311,19 @@ static void __init init_intel(struct cpu - smp_num_siblings = 1; - goto too_many_siblings; - } -- /* cpuid returns the value latched in the HW at reset, -- * not the APIC ID register's value. For any box -- * whose BIOS changes APIC IDs, like clustered APIC -- * systems, we must use hard_smp_processor_id. -- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. -- */ -- phys_proc_id[cpu] = hard_smp_processor_id() & ~(smp_num_siblings - 1); -+ tmp = smp_num_siblings; -+ while ((tmp & 1) == 0) { -+ tmp >>=1 ; -+ index_lsb++; -+ } -+ tmp = smp_num_siblings; -+ while ((tmp & 0x80000000 ) == 0) { -+ tmp <<=1 ; -+ index_msb--; -+ } -+ if (index_lsb != index_msb ) -+ index_msb++; -+ phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); ---- linux-2.6.0/arch/i386/kernel/dmi_scan.c 2003-10-08 15:07:08.000000000 -0700 -+++ 25/arch/i386/kernel/dmi_scan.c 2003-12-28 23:21:33.000000000 -0800 -@@ -6,6 +6,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -16,6 +17,7 @@ EXPORT_SYMBOL(dmi_broken); - - int is_sony_vaio_laptop; - int is_unsafe_smbus; -+int es7000_plat = 0; - - struct dmi_header - { -@@ -504,6 +506,7 @@ static __init int print_if_true(struct d - } - - -+#ifdef CONFIG_ACPI_BOOT - extern int acpi_disabled, acpi_force; - - static __init __attribute__((unused)) int acpi_disable(struct dmi_blacklist *d) -@@ -518,8 +521,6 @@ static __init __attribute__((unused)) in - return 0; - } - -- --#ifdef CONFIG_ACPI_BOOT - extern int acpi_ht; - - /* -@@ -542,10 +543,8 @@ static __init __attribute__((unused)) in - #ifdef CONFIG_ACPI_PCI - static __init int disable_acpi_pci(struct dmi_blacklist *d) - { -- extern __init void pci_disable_acpi(void) ; -- - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", d->ident); -- pci_disable_acpi(); -+ acpi_noirq_set(); - return 0; - } - #endif -@@ -1011,6 +1010,7 @@ static __init void dmi_check_blacklist(v - printk(KERN_NOTICE "ACPI disabled because your bios is from %s and too old\n", s); - printk(KERN_NOTICE "You can enable it with acpi=force\n"); - acpi_disabled = 1; -+ acpi_ht = 0; - } - } - } ---- linux-2.6.0/arch/i386/kernel/doublefault.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/doublefault.c 2003-12-28 23:26:36.000000000 -0800 -@@ -7,12 +7,13 @@ - #include - #include - #include -+#include - - #define DOUBLEFAULT_STACKSIZE (1024) - static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; - #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - --#define ptr_ok(x) ((x) > 0xc0000000 && (x) < 0xc1000000) -+#define ptr_ok(x) (((x) > __PAGE_OFFSET && (x) < (__PAGE_OFFSET + 0x01000000)) || ((x) >= FIXADDR_START)) - - static void doublefault_fn(void) - { -@@ -38,8 +39,8 @@ static void doublefault_fn(void) - - printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", - t->eax, t->ebx, t->ecx, t->edx); -- printk("esi = %08lx, edi = %08lx\n", -- t->esi, t->edi); -+ printk("esi = %08lx, edi = %08lx, ebp = %08lx\n", -+ t->esi, t->edi, t->ebp); - } - } - ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25/arch/i386/kernel/efi.c 2003-12-28 23:21:45.000000000 -0800 -@@ -0,0 +1,645 @@ -+/* -+ * Extensible Firmware Interface -+ * -+ * Based on Extensible Firmware Interface Specification version 1.0 -+ * -+ * Copyright (C) 1999 VA Linux Systems -+ * Copyright (C) 1999 Walt Drummond -+ * Copyright (C) 1999-2002 Hewlett-Packard Co. -+ * David Mosberger-Tang -+ * Stephane Eranian -+ * -+ * All EFI Runtime Services are not implemented yet as EFI only -+ * supports physical mode addressing on SoftSDV. This is to be fixed -+ * in a future version. --drummond 1999-07-20 -+ * -+ * Implemented EFI runtime services and virtual mode calls. --davidm -+ * -+ * Goutham Rao: -+ * Skip non-WB memory and ignore empty memory ranges. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EFI_DEBUG 0 -+#define PFX "EFI: " -+ -+extern efi_status_t asmlinkage efi_call_phys(void *, ...); -+ -+struct efi efi; -+struct efi efi_phys __initdata; -+struct efi_memory_map memmap __initdata; -+ -+/* -+ * We require an early boot_ioremap mapping mechanism initially -+ */ -+extern void * boot_ioremap(unsigned long, unsigned long); -+ -+/* -+ * efi_dir is allocated here, but the directory isn't created -+ * here, as proc_mkdir() doesn't work this early in the bootup -+ * process. Therefore, each module, like efivars, must test for -+ * if (!efi_dir) efi_dir = proc_mkdir("efi", NULL); -+ * prior to creating their own entries under /proc/efi. -+ */ -+#ifdef CONFIG_PROC_FS -+struct proc_dir_entry *efi_dir; -+#endif -+ -+ -+/* -+ * To make EFI call EFI runtime service in physical addressing mode we need -+ * prelog/epilog before/after the invocation to disable interrupt, to -+ * claim EFI runtime service handler exclusively and to duplicate a memory in -+ * low memory space say 0 - 3G. -+ */ -+ -+static unsigned long efi_rt_eflags; -+static spinlock_t efi_rt_lock = SPIN_LOCK_UNLOCKED; -+static pgd_t efi_bak_pg_dir_pointer[2]; -+ -+static void efi_call_phys_prelog(void) -+{ -+ unsigned long cr4; -+ unsigned long temp; -+ -+ spin_lock(&efi_rt_lock); -+ local_irq_save(efi_rt_eflags); -+ -+ /* -+ * If I don't have PSE, I should just duplicate two entries in page -+ * directory. If I have PSE, I just need to duplicate one entry in -+ * page directory. -+ */ -+ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); -+ -+ if (cr4 & X86_CR4_PSE) { -+ efi_bak_pg_dir_pointer[0].pgd = -+ swapper_pg_dir[pgd_index(0)].pgd; -+ swapper_pg_dir[0].pgd = -+ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; -+ } else { -+ efi_bak_pg_dir_pointer[0].pgd = -+ swapper_pg_dir[pgd_index(0)].pgd; -+ efi_bak_pg_dir_pointer[1].pgd = -+ swapper_pg_dir[pgd_index(0x400000)].pgd; -+ swapper_pg_dir[pgd_index(0)].pgd = -+ swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; -+ temp = PAGE_OFFSET + 0x400000; -+ swapper_pg_dir[pgd_index(0x400000)].pgd = -+ swapper_pg_dir[pgd_index(temp)].pgd; -+ } -+ -+ /* -+ * After the lock is released, the original page table is restored. -+ */ -+ local_flush_tlb(); -+ -+ cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); -+ __asm__ __volatile__("lgdt %0":"=m" -+ (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); -+} -+ -+static void efi_call_phys_epilog(void) -+{ -+ unsigned long cr4; -+ -+ cpu_gdt_descr[0].address = -+ (unsigned long) __va(cpu_gdt_descr[0].address); -+ __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); -+ __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); -+ -+ if (cr4 & X86_CR4_PSE) { -+ swapper_pg_dir[pgd_index(0)].pgd = -+ efi_bak_pg_dir_pointer[0].pgd; -+ } else { -+ swapper_pg_dir[pgd_index(0)].pgd = -+ efi_bak_pg_dir_pointer[0].pgd; -+ swapper_pg_dir[pgd_index(0x400000)].pgd = -+ efi_bak_pg_dir_pointer[1].pgd; -+ } -+ -+ /* -+ * After the lock is released, the original page table is restored. -+ */ -+ local_flush_tlb(); -+ -+ local_irq_restore(efi_rt_eflags); -+ spin_unlock(&efi_rt_lock); -+} -+ -+static efi_status_t -+phys_efi_set_virtual_address_map(unsigned long memory_map_size, -+ unsigned long descriptor_size, -+ u32 descriptor_version, -+ efi_memory_desc_t *virtual_map) -+{ -+ efi_status_t status; -+ -+ efi_call_phys_prelog(); -+ status = efi_call_phys(efi_phys.set_virtual_address_map, -+ memory_map_size, descriptor_size, -+ descriptor_version, virtual_map); -+ efi_call_phys_epilog(); -+ return status; -+} -+ -+efi_status_t -+phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -+{ -+ efi_status_t status; -+ -+ efi_call_phys_prelog(); -+ status = efi_call_phys(efi_phys.get_time, tm, tc); -+ efi_call_phys_epilog(); -+ return status; -+} -+ -+int inline efi_set_rtc_mmss(unsigned long nowtime) -+{ -+ int real_seconds, real_minutes; -+ efi_status_t status; -+ efi_time_t eft; -+ efi_time_cap_t cap; -+ -+ spin_lock(&efi_rt_lock); -+ status = efi.get_time(&eft, &cap); -+ spin_unlock(&efi_rt_lock); -+ if (status != EFI_SUCCESS) -+ panic("Ooops, efitime: can't read time!\n"); -+ real_seconds = nowtime % 60; -+ real_minutes = nowtime / 60; -+ -+ if (((abs(real_minutes - eft.minute) + 15)/30) & 1) -+ real_minutes += 30; -+ real_minutes %= 60; -+ -+ eft.minute = real_minutes; -+ eft.second = real_seconds; -+ -+ if (status != EFI_SUCCESS) { -+ printk("Ooops: efitime: can't read time!\n"); -+ return -1; -+ } -+ return 0; -+} -+/* -+ * This should only be used during kernel init and before runtime -+ * services have been remapped, therefore, we'll need to call in physical -+ * mode. Note, this call isn't used later, so mark it __init. -+ */ -+unsigned long inline __init efi_get_time(void) -+{ -+ efi_status_t status; -+ efi_time_t eft; -+ efi_time_cap_t cap; -+ -+ status = phys_efi_get_time(&eft, &cap); -+ if (status != EFI_SUCCESS) -+ printk("Oops: efitime: can't read time status: 0x%lx\n",status); -+ -+ return mktime(eft.year, eft.month, eft.day, eft.hour, -+ eft.minute, eft.second); -+} -+ -+int is_available_memory(efi_memory_desc_t * md) -+{ -+ if (!(md->attribute & EFI_MEMORY_WB)) -+ return 0; -+ -+ switch (md->type) { -+ case EFI_LOADER_CODE: -+ case EFI_LOADER_DATA: -+ case EFI_BOOT_SERVICES_CODE: -+ case EFI_BOOT_SERVICES_DATA: -+ case EFI_CONVENTIONAL_MEMORY: -+ return 1; -+ } -+ return 0; -+} -+ -+/* -+ * We need to map the EFI memory map again after paging_init(). -+ */ -+void __init efi_map_memmap(void) -+{ -+ memmap.map = NULL; -+ -+ memmap.map = (efi_memory_desc_t *) -+ bt_ioremap((unsigned long) memmap.phys_map, -+ (memmap.nr_map * sizeof(efi_memory_desc_t))); -+ -+ if (memmap.map == NULL) -+ printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); -+} -+ -+void __init print_efi_memmap(void) -+{ -+ efi_memory_desc_t *md; -+ int i; -+ -+ for (i = 0; i < memmap.nr_map; i++) { -+ md = &memmap.map[i]; -+ printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " -+ "range=[0x%016llx-0x%016llx) (%lluMB)\n", -+ i, md->type, md->attribute, md->phys_addr, -+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), -+ (md->num_pages >> (20 - EFI_PAGE_SHIFT))); -+ } -+} -+ -+/* -+ * Walks the EFI memory map and calls CALLBACK once for each EFI -+ * memory descriptor that has memory that is available for kernel use. -+ */ -+void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) -+{ -+ int prev_valid = 0; -+ struct range { -+ unsigned long start; -+ unsigned long end; -+ } prev, curr; -+ efi_memory_desc_t *md; -+ unsigned long start, end; -+ int i; -+ -+ for (i = 0; i < memmap.nr_map; i++) { -+ md = &memmap.map[i]; -+ -+ if ((md->num_pages == 0) || (!is_available_memory(md))) -+ continue; -+ -+ curr.start = md->phys_addr; -+ curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); -+ -+ if (!prev_valid) { -+ prev = curr; -+ prev_valid = 1; -+ } else { -+ if (curr.start < prev.start) -+ printk(KERN_INFO PFX "Unordered memory map\n"); -+ if (prev.end == curr.start) -+ prev.end = curr.end; -+ else { -+ start = -+ (unsigned long) (PAGE_ALIGN(prev.start)); -+ end = (unsigned long) (prev.end & PAGE_MASK); -+ if ((end > start) -+ && (*callback) (start, end, arg) < 0) -+ return; -+ prev = curr; -+ } -+ } -+ } -+ if (prev_valid) { -+ start = (unsigned long) PAGE_ALIGN(prev.start); -+ end = (unsigned long) (prev.end & PAGE_MASK); -+ if (end > start) -+ (*callback) (start, end, arg); -+ } -+} -+ -+void __init efi_init(void) -+{ -+ efi_config_table_t *config_tables; -+ efi_runtime_services_t *runtime; -+ efi_char16_t *c16; -+ char vendor[100] = "unknown"; -+ unsigned long num_config_tables; -+ int i = 0; -+ -+ memset(&efi, 0, sizeof(efi) ); -+ memset(&efi_phys, 0, sizeof(efi_phys)); -+ -+ efi_phys.systab = EFI_SYSTAB; -+ memmap.phys_map = EFI_MEMMAP; -+ memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; -+ memmap.desc_version = EFI_MEMDESC_VERSION; -+ -+ efi.systab = (efi_system_table_t *) -+ boot_ioremap((unsigned long) efi_phys.systab, -+ sizeof(efi_system_table_t)); -+ /* -+ * Verify the EFI Table -+ */ -+ if (efi.systab == NULL) -+ printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); -+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) -+ printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); -+ if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) -+ printk(KERN_ERR PFX -+ "Warning: EFI system table major version mismatch: " -+ "got %d.%02d, expected %d.%02d\n", -+ efi.systab->hdr.revision >> 16, -+ efi.systab->hdr.revision & 0xffff, -+ EFI_SYSTEM_TABLE_REVISION >> 16, -+ EFI_SYSTEM_TABLE_REVISION & 0xffff); -+ /* -+ * Grab some details from the system table -+ */ -+ num_config_tables = efi.systab->nr_tables; -+ config_tables = (efi_config_table_t *)efi.systab->tables; -+ runtime = efi.systab->runtime; -+ -+ /* -+ * Show what we know for posterity -+ */ -+ c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); -+ if (c16) { -+ for (i = 0; i < sizeof(vendor) && *c16; ++i) -+ vendor[i] = *c16++; -+ vendor[i] = '\0'; -+ } else -+ printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); -+ -+ printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", -+ efi.systab->hdr.revision >> 16, -+ efi.systab->hdr.revision & 0xffff, vendor); -+ -+ /* -+ * Let's see what config tables the firmware passed to us. -+ */ -+ config_tables = (efi_config_table_t *) -+ boot_ioremap((unsigned long) config_tables, -+ num_config_tables * sizeof(efi_config_table_t)); -+ -+ if (config_tables == NULL) -+ printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); -+ -+ for (i = 0; i < num_config_tables; i++) { -+ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { -+ efi.mps = (void *)config_tables[i].table; -+ printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); -+ } else -+ if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { -+ efi.acpi20 = __va(config_tables[i].table); -+ printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); -+ } else -+ if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { -+ efi.acpi = __va(config_tables[i].table); -+ printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); -+ } else -+ if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { -+ efi.smbios = (void *) config_tables[i].table; -+ printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); -+ } else -+ if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { -+ efi.hcdp = (void *)config_tables[i].table; -+ printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); -+ } else -+ if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { -+ efi.uga = (void *)config_tables[i].table; -+ printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); -+ } -+ } -+ printk("\n"); -+ -+ /* -+ * Check out the runtime services table. We need to map -+ * the runtime services table so that we can grab the physical -+ * address of several of the EFI runtime functions, needed to -+ * set the firmware into virtual mode. -+ */ -+ -+ runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) -+ runtime, -+ sizeof(efi_runtime_services_t)); -+ if (runtime != NULL) { -+ /* -+ * We will only need *early* access to the following -+ * two EFI runtime services before set_virtual_address_map -+ * is invoked. -+ */ -+ efi_phys.get_time = (efi_get_time_t *) runtime->get_time; -+ efi_phys.set_virtual_address_map = -+ (efi_set_virtual_address_map_t *) -+ runtime->set_virtual_address_map; -+ } else -+ printk(KERN_ERR PFX "Could not map the runtime service table!\n"); -+ -+ /* Map the EFI memory map for use until paging_init() */ -+ -+ memmap.map = (efi_memory_desc_t *) -+ boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); -+ -+ if (memmap.map == NULL) -+ printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); -+ -+ if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { -+ printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " -+ "match the one from EFI!\n"); -+ } -+#if EFI_DEBUG -+ print_efi_memmap(); -+#endif -+} -+ -+/* -+ * This function will switch the EFI runtime services to virtual mode. -+ * Essentially, look through the EFI memmap and map every region that -+ * has the runtime attribute bit set in its memory descriptor and update -+ * that memory descriptor with the virtual address obtained from ioremap(). -+ * This enables the runtime services to be called without having to -+ * thunk back into physical mode for every invocation. -+ */ -+ -+void __init efi_enter_virtual_mode(void) -+{ -+ efi_memory_desc_t *md; -+ efi_status_t status; -+ int i; -+ -+ efi.systab = NULL; -+ -+ for (i = 0; i < memmap.nr_map; i++) { -+ md = &memmap.map[i]; -+ -+ if (md->attribute & EFI_MEMORY_RUNTIME) { -+ md->virt_addr = -+ (unsigned long)ioremap(md->phys_addr, -+ md->num_pages << EFI_PAGE_SHIFT); -+ if (!(unsigned long)md->virt_addr) { -+ printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", -+ (unsigned long)md->phys_addr); -+ } -+ -+ if (((unsigned long)md->phys_addr <= -+ (unsigned long)efi_phys.systab) && -+ ((unsigned long)efi_phys.systab < -+ md->phys_addr + -+ ((unsigned long)md->num_pages << -+ EFI_PAGE_SHIFT))) { -+ unsigned long addr; -+ -+ addr = md->virt_addr - md->phys_addr + -+ (unsigned long)efi_phys.systab; -+ efi.systab = (efi_system_table_t *)addr; -+ } -+ } -+ } -+ -+ if (!efi.systab) -+ BUG(); -+ -+ status = phys_efi_set_virtual_address_map( -+ sizeof(efi_memory_desc_t) * memmap.nr_map, -+ sizeof(efi_memory_desc_t), -+ memmap.desc_version, -+ memmap.phys_map); -+ -+ if (status != EFI_SUCCESS) { -+ printk (KERN_ALERT "You are screwed! " -+ "Unable to switch EFI into virtual mode " -+ "(status=%lx)\n", status); -+ panic("EFI call to SetVirtualAddressMap() failed!"); -+ } -+ -+ /* -+ * Now that EFI is in virtual mode, update the function -+ * pointers in the runtime service table to the new virtual addresses. -+ */ -+ -+ efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; -+ efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; -+ efi.get_wakeup_time = (efi_get_wakeup_time_t *) -+ efi.systab->runtime->get_wakeup_time; -+ efi.set_wakeup_time = (efi_set_wakeup_time_t *) -+ efi.systab->runtime->set_wakeup_time; -+ efi.get_variable = (efi_get_variable_t *) -+ efi.systab->runtime->get_variable; -+ efi.get_next_variable = (efi_get_next_variable_t *) -+ efi.systab->runtime->get_next_variable; -+ efi.set_variable = (efi_set_variable_t *) -+ efi.systab->runtime->set_variable; -+ efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) -+ efi.systab->runtime->get_next_high_mono_count; -+ efi.reset_system = (efi_reset_system_t *) -+ efi.systab->runtime->reset_system; -+} -+ -+void __init -+efi_initialize_iomem_resources(struct resource *code_resource, -+ struct resource *data_resource) -+{ -+ struct resource *res; -+ efi_memory_desc_t *md; -+ int i; -+ -+ for (i = 0; i < memmap.nr_map; i++) { -+ md = &memmap.map[i]; -+ -+ if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > -+ 0x100000000ULL) -+ continue; -+ res = alloc_bootmem_low(sizeof(struct resource)); -+ switch (md->type) { -+ case EFI_RESERVED_TYPE: -+ res->name = "Reserved Memory"; -+ break; -+ case EFI_LOADER_CODE: -+ res->name = "Loader Code"; -+ break; -+ case EFI_LOADER_DATA: -+ res->name = "Loader Data"; -+ break; -+ case EFI_BOOT_SERVICES_DATA: -+ res->name = "BootServices Data"; -+ break; -+ case EFI_BOOT_SERVICES_CODE: -+ res->name = "BootServices Code"; -+ break; -+ case EFI_RUNTIME_SERVICES_CODE: -+ res->name = "Runtime Service Code"; -+ break; -+ case EFI_RUNTIME_SERVICES_DATA: -+ res->name = "Runtime Service Data"; -+ break; -+ case EFI_CONVENTIONAL_MEMORY: -+ res->name = "Conventional Memory"; -+ break; -+ case EFI_UNUSABLE_MEMORY: -+ res->name = "Unusable Memory"; -+ break; -+ case EFI_ACPI_RECLAIM_MEMORY: -+ res->name = "ACPI Reclaim"; -+ break; -+ case EFI_ACPI_MEMORY_NVS: -+ res->name = "ACPI NVS"; -+ break; -+ case EFI_MEMORY_MAPPED_IO: -+ res->name = "Memory Mapped IO"; -+ break; -+ case EFI_MEMORY_MAPPED_IO_PORT_SPACE: -+ res->name = "Memory Mapped IO Port Space"; -+ break; -+ default: -+ res->name = "Reserved"; -+ break; -+ } -+ res->start = md->phys_addr; -+ res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); -+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; -+ if (request_resource(&iomem_resource, res) < 0) -+ printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", -+ res->name, res->start, res->end); -+ /* -+ * We don't know which region contains kernel data so we try -+ * it repeatedly and let the resource manager test it. -+ */ -+ if (md->type == EFI_CONVENTIONAL_MEMORY) { -+ request_resource(res, code_resource); -+ request_resource(res, data_resource); -+ } -+ } -+} -+ -+/* -+ * Convenience functions to obtain memory types and attributes -+ */ -+ -+u32 efi_mem_type(unsigned long phys_addr) -+{ -+ efi_memory_desc_t *md; -+ int i; -+ -+ for (i = 0; i < memmap.nr_map; i++) { -+ md = &memmap.map[i]; -+ if ((md->phys_addr <= phys_addr) && (phys_addr < -+ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) -+ return md->type; -+ } -+ return 0; -+} -+ -+u64 efi_mem_attributes(unsigned long phys_addr) -+{ -+ efi_memory_desc_t *md; -+ int i; -+ -+ for (i = 0; i < memmap.nr_map; i++) { -+ md = &memmap.map[i]; -+ if ((md->phys_addr <= phys_addr) && (phys_addr < -+ (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) -+ return md->attribute; -+ } -+ return 0; -+} ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25/arch/i386/kernel/efi_stub.S 2003-12-28 23:21:45.000000000 -0800 -@@ -0,0 +1,124 @@ -+/* -+ * EFI call stub for IA32. -+ * -+ * This stub allows us to make EFI calls in physical mode with interrupts -+ * turned off. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+/* -+ * efi_call_phys(void *, ...) is a function with variable parameters. -+ * All the callers of this function assure that all the parameters are 4-bytes. -+ */ -+ -+/* -+ * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. -+ * So we'd better save all of them at the beginning of this function and restore -+ * at the end no matter how many we use, because we can not assure EFI runtime -+ * service functions will comply with gcc calling convention, too. -+ */ -+ -+.text -+ENTRY(efi_call_phys) -+ /* -+ * 0. The function can only be called in Linux kernel. So CS has been -+ * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found -+ * the values of these registers are the same. And, the corresponding -+ * GDT entries are identical. So I will do nothing about segment reg -+ * and GDT, but change GDT base register in prelog and epilog. -+ */ -+ -+ /* -+ * 1. Now I am running with EIP = + PAGE_OFFSET. -+ * But to make it smoothly switch from virtual mode to flat mode. -+ * The mapping of lower virtual memory has been created in prelog and -+ * epilog. -+ */ -+ movl $1f, %edx -+ subl $__PAGE_OFFSET, %edx -+ jmp *%edx -+1: -+ -+ /* -+ * 2. Now on the top of stack is the return -+ * address in the caller of efi_call_phys(), then parameter 1, -+ * parameter 2, ..., param n. To make things easy, we save the return -+ * address of efi_call_phys in a global variable. -+ */ -+ popl %edx -+ movl %edx, saved_return_addr -+ /* get the function pointer into ECX*/ -+ popl %ecx -+ movl %ecx, efi_rt_function_ptr -+ movl $2f, %edx -+ subl $__PAGE_OFFSET, %edx -+ pushl %edx -+ -+ /* -+ * 3. Clear PG bit in %CR0. -+ */ -+ movl %cr0, %edx -+ andl $0x7fffffff, %edx -+ movl %edx, %cr0 -+ jmp 1f -+1: -+ -+ /* -+ * 4. Adjust stack pointer. -+ */ -+ subl $__PAGE_OFFSET, %esp -+ -+ /* -+ * 5. Call the physical function. -+ */ -+ jmp *%ecx -+ -+2: -+ /* -+ * 6. After EFI runtime service returns, control will return to -+ * following instruction. We'd better readjust stack pointer first. -+ */ -+ addl $__PAGE_OFFSET, %esp -+ -+ /* -+ * 7. Restore PG bit -+ */ -+ movl %cr0, %edx -+ orl $0x80000000, %edx -+ movl %edx, %cr0 -+ jmp 1f -+1: -+ /* -+ * 8. Now restore the virtual mode from flat mode by -+ * adding EIP with PAGE_OFFSET. -+ */ -+ movl $1f, %edx -+ jmp *%edx -+1: -+ -+ /* -+ * 9. Balance the stack. And because EAX contain the return value, -+ * we'd better not clobber it. -+ */ -+ leal efi_rt_function_ptr, %edx -+ movl (%edx), %ecx -+ pushl %ecx -+ -+ /* -+ * 10. Push the saved return address onto the stack and return. -+ */ -+ leal saved_return_addr, %edx -+ movl (%edx), %ecx -+ pushl %ecx -+ ret -+.previous -+ -+.data -+saved_return_addr: -+ .long 0 -+efi_rt_function_ptr: -+ .long 0 ---- linux-2.6.0/arch/i386/kernel/entry.S 2003-11-23 19:03:00.000000000 -0800 -+++ 25/arch/i386/kernel/entry.S 2003-12-28 23:26:36.000000000 -0800 -@@ -43,11 +43,25 @@ - #include - #include - #include -+#include - #include - #include -+#include - #include - #include - #include "irq_vectors.h" -+ /* We do not recover from a stack overflow, but at least -+ * we know it happened and should be able to track it down. -+ */ -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#define STACK_OVERFLOW_TEST \ -+ testl $7680,%esp; \ -+ jnz 10f; \ -+ call stack_overflow; \ -+10: -+#else -+#define STACK_OVERFLOW_TEST -+#endif - - #define nr_syscalls ((syscall_table_size)/4) - -@@ -87,7 +101,102 @@ TSS_ESP0_OFFSET = (4 - 0x200) - #define resume_kernel restore_all - #endif - --#define SAVE_ALL \ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ -+#ifdef CONFIG_X86_SWITCH_PAGETABLES -+ -+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -+/* -+ * If task is preempted in __SWITCH_KERNELSPACE, and moved to another cpu, -+ * __switch_to repoints %esp to the appropriate virtual stack; but %ebp is -+ * left stale, so we must check whether to repeat the real stack calculation. -+ */ -+#define repeat_if_esp_changed \ -+ xorl %esp, %ebp; \ -+ testl $0xffffe000, %ebp; \ -+ jnz 0b -+#else -+#define repeat_if_esp_changed -+#endif -+ -+/* clobbers ebx, edx and ebp */ -+ -+#define __SWITCH_KERNELSPACE \ -+ cmpl $0xff000000, %esp; \ -+ jb 1f; \ -+ \ -+ /* \ -+ * switch pagetables and load the real stack, \ -+ * keep the stack offset: \ -+ */ \ -+ \ -+ movl $swapper_pg_dir-__PAGE_OFFSET, %edx; \ -+ \ -+ /* GET_THREAD_INFO(%ebp) intermixed */ \ -+0: \ -+ movl %esp, %ebp; \ -+ movl %esp, %ebx; \ -+ andl $0xffffe000, %ebp; \ -+ andl $0x00001fff, %ebx; \ -+ orl TI_real_stack(%ebp), %ebx; \ -+ repeat_if_esp_changed; \ -+ \ -+ movl %edx, %cr3; \ -+ movl %ebx, %esp; \ -+1: -+ -+#endif -+ -+ -+#define __SWITCH_USERSPACE \ -+ /* interrupted any of the user return paths? */ \ -+ \ -+ movl EIP(%esp), %eax; \ -+ \ -+ cmpl $int80_ret_start_marker, %eax; \ -+ jb 33f; /* nope - continue with sysexit check */\ -+ cmpl $int80_ret_end_marker, %eax; \ -+ jb 22f; /* yes - switch to virtual stack */ \ -+33: \ -+ cmpl $sysexit_ret_start_marker, %eax; \ -+ jb 44f; /* nope - continue with user check */ \ -+ cmpl $sysexit_ret_end_marker, %eax; \ -+ jb 22f; /* yes - switch to virtual stack */ \ -+ /* return to userspace? */ \ -+44: \ -+ movl EFLAGS(%esp),%ecx; \ -+ movb CS(%esp),%cl; \ -+ testl $(VM_MASK | 3),%ecx; \ -+ jz 2f; \ -+22: \ -+ /* \ -+ * switch to the virtual stack, then switch to \ -+ * the userspace pagetables. \ -+ */ \ -+ \ -+ GET_THREAD_INFO(%ebp); \ -+ movl TI_virtual_stack(%ebp), %edx; \ -+ movl TI_user_pgd(%ebp), %ecx; \ -+ \ -+ movl %esp, %ebx; \ -+ andl $0x1fff, %ebx; \ -+ orl %ebx, %edx; \ -+int80_ret_start_marker: \ -+ movl %edx, %esp; \ -+ movl %ecx, %cr3; \ -+ \ -+ __RESTORE_ALL; \ -+int80_ret_end_marker: \ -+2: -+ -+#else /* !CONFIG_X86_HIGH_ENTRY */ -+ -+#define __SWITCH_KERNELSPACE -+#define __SWITCH_USERSPACE -+ -+#endif -+ -+#define __SAVE_ALL \ - cld; \ - pushl %es; \ - pushl %ds; \ -@@ -102,7 +211,7 @@ TSS_ESP0_OFFSET = (4 - 0x200) - movl %edx, %ds; \ - movl %edx, %es; - --#define RESTORE_INT_REGS \ -+#define __RESTORE_INT_REGS \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ -@@ -111,29 +220,28 @@ TSS_ESP0_OFFSET = (4 - 0x200) - popl %ebp; \ - popl %eax - --#define RESTORE_REGS \ -- RESTORE_INT_REGS; \ --1: popl %ds; \ --2: popl %es; \ -+#define __RESTORE_REGS \ -+ __RESTORE_INT_REGS; \ -+111: popl %ds; \ -+222: popl %es; \ - .section .fixup,"ax"; \ --3: movl $0,(%esp); \ -- jmp 1b; \ --4: movl $0,(%esp); \ -- jmp 2b; \ -+444: movl $0,(%esp); \ -+ jmp 111b; \ -+555: movl $0,(%esp); \ -+ jmp 222b; \ - .previous; \ - .section __ex_table,"a";\ - .align 4; \ -- .long 1b,3b; \ -- .long 2b,4b; \ -+ .long 111b,444b;\ -+ .long 222b,555b;\ - .previous - -- --#define RESTORE_ALL \ -- RESTORE_REGS \ -+#define __RESTORE_ALL \ -+ __RESTORE_REGS \ - addl $4, %esp; \ --1: iret; \ -+333: iret; \ - .section .fixup,"ax"; \ --2: sti; \ -+666: sti; \ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ -@@ -142,10 +250,19 @@ TSS_ESP0_OFFSET = (4 - 0x200) - .previous; \ - .section __ex_table,"a";\ - .align 4; \ -- .long 1b,2b; \ -+ .long 333b,666b;\ - .previous - -+#define SAVE_ALL \ -+ __SAVE_ALL; \ -+ __SWITCH_KERNELSPACE; \ -+ STACK_OVERFLOW_TEST; -+ -+#define RESTORE_ALL \ -+ __SWITCH_USERSPACE; \ -+ __RESTORE_ALL; - -+.section .entry.text,"ax" - - ENTRY(lcall7) - pushfl # We get a different stack layout with call -@@ -163,7 +280,7 @@ do_lcall: - movl %edx,EIP(%ebp) # Now we move them to their "normal" places - movl %ecx,CS(%ebp) # - andl $-8192, %ebp # GET_THREAD_INFO -- movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain -+ movl TI_exec_domain(%ebp), %edx # Get the execution domain - call *4(%edx) # Call the lcall7 handler for the domain - addl $4, %esp - popl %eax -@@ -208,7 +325,7 @@ ENTRY(resume_userspace) - cli # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret -- movl TI_FLAGS(%ebp), %ecx -+ movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done on - # int/exception return? - jne work_pending -@@ -216,18 +333,18 @@ ENTRY(resume_userspace) - - #ifdef CONFIG_PREEMPT - ENTRY(resume_kernel) -- cmpl $0,TI_PRE_COUNT(%ebp) # non-zero preempt_count ? -+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all - need_resched: -- movl TI_FLAGS(%ebp), %ecx # need_resched set ? -+ movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all -- movl $PREEMPT_ACTIVE,TI_PRE_COUNT(%ebp) -+ movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) - sti - call schedule -- movl $0,TI_PRE_COUNT(%ebp) -+ movl $0,TI_preempt_count(%ebp) - cli - jmp need_resched - #endif -@@ -246,37 +363,50 @@ sysenter_past_esp: - pushl $(__USER_CS) - pushl $SYSENTER_RETURN - --/* -- * Load the potential sixth argument from user stack. -- * Careful about security. -- */ -- cmpl $__PAGE_OFFSET-3,%ebp -- jae syscall_fault --1: movl (%ebp),%ebp --.section __ex_table,"a" -- .align 4 -- .long 1b,syscall_fault --.previous -- - pushl %eax - SAVE_ALL - GET_THREAD_INFO(%ebp) - cmpl $(nr_syscalls), %eax - jae syscall_badsys - -- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) -+ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) - jnz syscall_trace_entry - call *sys_call_table(,%eax,4) - movl %eax,EAX(%esp) - cli -- movl TI_FLAGS(%ebp), %ecx -+ movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx - jne syscall_exit_work -+ -+#ifdef CONFIG_X86_SWITCH_PAGETABLES -+ -+ GET_THREAD_INFO(%ebp) -+ movl TI_virtual_stack(%ebp), %edx -+ movl TI_user_pgd(%ebp), %ecx -+ movl %esp, %ebx -+ andl $0x1fff, %ebx -+ orl %ebx, %edx -+sysexit_ret_start_marker: -+ movl %edx, %esp -+ movl %ecx, %cr3 -+#endif -+ /* -+ * only ebx is not restored by the userspace sysenter vsyscall -+ * code, it assumes it to be callee-saved. -+ */ -+ movl EBX(%esp), %ebx -+ - /* if something modifies registers it must also disable sysexit */ -+ - movl EIP(%esp), %edx - movl OLDESP(%esp), %ecx -+ - sti - sysexit -+#ifdef CONFIG_X86_SWITCH_PAGETABLES -+sysexit_ret_end_marker: -+ nop -+#endif - - - # system call handler stub -@@ -287,7 +417,7 @@ ENTRY(system_call) - cmpl $(nr_syscalls), %eax - jae syscall_badsys - # system call tracing in operation -- testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebp) -+ testb $_TIF_SYSCALL_TRACE,TI_flags(%ebp) - jnz syscall_trace_entry - syscall_call: - call *sys_call_table(,%eax,4) -@@ -296,10 +426,23 @@ syscall_exit: - cli # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret -- movl TI_FLAGS(%ebp), %ecx -+ movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx # current->work - jne syscall_exit_work - restore_all: -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS -+ movb CS(%esp), %al -+ testl $(VM_MASK | 3), %eax -+ jz resume_kernelX # returning to kernel or vm86-space -+ -+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? -+ jz resume_kernelX -+ -+ int $3 -+ -+resume_kernelX: -+#endif - RESTORE_ALL - - # perform work that needs to be done immediately before resumption -@@ -312,7 +455,7 @@ work_resched: - cli # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret -- movl TI_FLAGS(%ebp), %ecx -+ movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done other - # than syscall tracing? - jz restore_all -@@ -327,6 +470,22 @@ work_notifysig: # deal with pending s - # vm86-space - xorl %edx, %edx - call do_notify_resume -+ -+#if CONFIG_X86_HIGH_ENTRY -+ /* -+ * Reload db7 if necessary: -+ */ -+ movl TI_flags(%ebp), %ecx -+ testb $_TIF_DB7, %cl -+ jnz work_db7 -+ -+ jmp restore_all -+ -+work_db7: -+ movl TI_task(%ebp), %edx; -+ movl task_thread_db7(%edx), %edx; -+ movl %edx, %db7; -+#endif - jmp restore_all - - ALIGN -@@ -382,7 +541,7 @@ syscall_badsys: - */ - .data - ENTRY(interrupt) --.text -+.previous - - vector=0 - ENTRY(irq_entries_start) -@@ -392,7 +551,7 @@ ENTRY(irq_entries_start) - jmp common_interrupt - .data - .long 1b --.text -+.previous - vector=vector+1 - .endr - -@@ -433,12 +592,17 @@ error_code: - movl ES(%esp), %edi # get the function address - movl %eax, ORIG_EAX(%esp) - movl %ecx, ES(%esp) -- movl %esp, %edx - pushl %esi # push the error code -- pushl %edx # push the pt_regs pointer - movl $(__USER_DS), %edx - movl %edx, %ds - movl %edx, %es -+ -+/* clobbers edx, ebx and ebp */ -+ __SWITCH_KERNELSPACE -+ -+ leal 4(%esp), %edx # prepare pt_regs -+ pushl %edx # push pt_regs -+ - call *%edi - addl $8, %esp - jmp ret_from_exception -@@ -529,7 +693,7 @@ nmi_stack_correct: - pushl %edx - call do_nmi - addl $8, %esp -- RESTORE_ALL -+ jmp restore_all - - nmi_stack_fixup: - FIX_STACK(12,nmi_stack_correct, 1) -@@ -606,6 +770,8 @@ ENTRY(spurious_interrupt_bug) - pushl $do_spurious_interrupt_bug - jmp error_code - -+.previous -+ - .data - ENTRY(sys_call_table) - .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25/arch/i386/kernel/entry_trampoline.c 2003-12-28 23:26:36.000000000 -0800 -@@ -0,0 +1,75 @@ -+/* -+ * linux/arch/i386/kernel/entry_trampoline.c -+ * -+ * (C) Copyright 2003 Ingo Molnar -+ * -+ * This file contains the needed support code for 4GB userspace -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+extern char __entry_tramp_start, __entry_tramp_end, __start___entry_text; -+ -+void __init init_entry_mappings(void) -+{ -+#ifdef CONFIG_X86_HIGH_ENTRY -+ void *tramp; -+ -+ /* -+ * We need a high IDT and GDT for the 4G/4G split: -+ */ -+ trap_init_virtual_IDT(); -+ -+ __set_fixmap(FIX_ENTRY_TRAMPOLINE_0, __pa((unsigned long)&__entry_tramp_start), PAGE_KERNEL); -+ __set_fixmap(FIX_ENTRY_TRAMPOLINE_1, __pa((unsigned long)&__entry_tramp_start) + PAGE_SIZE, PAGE_KERNEL); -+ tramp = (void *)fix_to_virt(FIX_ENTRY_TRAMPOLINE_0); -+ -+ printk("mapped 4G/4G trampoline to %p.\n", tramp); -+ BUG_ON((void *)&__start___entry_text != tramp); -+ /* -+ * Virtual kernel stack: -+ */ -+ BUG_ON(__kmap_atomic_vaddr(KM_VSTACK0) & 8191); -+ BUG_ON(sizeof(struct desc_struct)*NR_CPUS*GDT_ENTRIES > 2*PAGE_SIZE); -+ BUG_ON((unsigned int)&__entry_tramp_end - (unsigned int)&__entry_tramp_start > 2*PAGE_SIZE); -+ -+ /* -+ * set up the initial thread's virtual stack related -+ * fields: -+ */ -+ current->thread.stack_page0 = virt_to_page((char *)current->thread_info); -+ current->thread.stack_page1 = virt_to_page((char *)current->thread_info + PAGE_SIZE); -+ current->thread_info->virtual_stack = (void *)__kmap_atomic_vaddr(KM_VSTACK0); -+ -+ __kunmap_atomic_type(KM_VSTACK0); -+ __kunmap_atomic_type(KM_VSTACK1); -+ __kmap_atomic(current->thread.stack_page0, KM_VSTACK0); -+ __kmap_atomic(current->thread.stack_page1, KM_VSTACK1); -+ -+#endif -+ printk("current: %p\n", current); -+ printk("current->thread_info: %p\n", current->thread_info); -+ current->thread_info->real_stack = (void *)current->thread_info; -+ current->thread_info->user_pgd = NULL; -+ current->thread.esp0 = (unsigned long)current->thread_info->real_stack + THREAD_SIZE; -+} -+ -+ -+ -+void __init entry_trampoline_setup(void) -+{ -+ /* -+ * old IRQ entries set up by the boot code will still hang -+ * around - they are a sign of hw trouble anyway, now they'll -+ * produce a double fault message. -+ */ -+ trap_init_virtual_GDT(); -+} ---- linux-2.6.0/arch/i386/kernel/head.S 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/head.S 2003-12-28 23:26:36.000000000 -0800 -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - - #define OLD_CL_MAGIC_ADDR 0x90020 - #define OLD_CL_MAGIC 0xA33F -@@ -330,7 +331,7 @@ ENTRY(stack_start) - - /* This is the default interrupt "handler" :-) */ - int_msg: -- .asciz "Unknown interrupt\n" -+ .asciz "Unknown interrupt or fault at EIP %p %p %p\n" - ALIGN - ignore_int: - cld -@@ -342,9 +343,17 @@ ignore_int: - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es -+ pushl 16(%esp) -+ pushl 24(%esp) -+ pushl 32(%esp) -+ pushl 40(%esp) - pushl $int_msg - call printk - popl %eax -+ popl %eax -+ popl %eax -+ popl %eax -+ popl %eax - popl %ds - popl %es - popl %edx -@@ -377,23 +386,27 @@ cpu_gdt_descr: - .fill NR_CPUS-1,8,0 # space for the other GDT descriptors - - /* -- * This is initialized to create an identity-mapping at 0-8M (for bootup -- * purposes) and another mapping of the 0-8M area at virtual address -+ * This is initialized to create an identity-mapping at 0-16M (for bootup -+ * purposes) and another mapping of the 0-16M area at virtual address - * PAGE_OFFSET. - */ - .org 0x1000 - ENTRY(swapper_pg_dir) - .long 0x00102007 - .long 0x00103007 -- .fill BOOT_USER_PGD_PTRS-2,4,0 -- /* default: 766 entries */ -+ .long 0x00104007 -+ .long 0x00105007 -+ .fill BOOT_USER_PGD_PTRS-4,4,0 -+ /* default: 764 entries */ - .long 0x00102007 - .long 0x00103007 -- /* default: 254 entries */ -- .fill BOOT_KERNEL_PGD_PTRS-2,4,0 -+ .long 0x00104007 -+ .long 0x00105007 -+ /* default: 252 entries */ -+ .fill BOOT_KERNEL_PGD_PTRS-4,4,0 - - /* -- * The page tables are initialized to only 8MB here - the final page -+ * The page tables are initialized to only 16MB here - the final page - * tables are set up later depending on memory size. - */ - .org 0x2000 -@@ -402,15 +415,21 @@ ENTRY(pg0) - .org 0x3000 - ENTRY(pg1) - -+.org 0x4000 -+ENTRY(pg2) -+ -+.org 0x5000 -+ENTRY(pg3) -+ - /* - * empty_zero_page must immediately follow the page tables ! (The - * initialization loop counts until empty_zero_page) - */ - --.org 0x4000 -+.org 0x6000 - ENTRY(empty_zero_page) - --.org 0x5000 -+.org 0x7000 - - /* - * Real beginning of normal "text" segment -@@ -419,12 +438,12 @@ ENTRY(stext) - ENTRY(_stext) - - /* -- * This starts the data section. Note that the above is all -- * in the text section because it has alignment requirements -- * that we cannot fulfill any other way. -+ * This starts the data section. - */ - .data - -+.align PAGE_SIZE_asm -+ - /* - * The Global Descriptor Table contains 28 quadwords, per-CPU. - */ -@@ -439,7 +458,9 @@ ENTRY(boot_gdt_table) - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ - #endif -- .align L1_CACHE_BYTES -+ -+.align PAGE_SIZE_asm -+ - ENTRY(cpu_gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* 0x0b reserved */ ---- linux-2.6.0/arch/i386/kernel/i386_ksyms.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/i386_ksyms.c 2003-12-28 23:26:36.000000000 -0800 -@@ -98,7 +98,6 @@ EXPORT_SYMBOL_NOVERS(__down_failed_inter - EXPORT_SYMBOL_NOVERS(__down_failed_trylock); - EXPORT_SYMBOL_NOVERS(__up_wakeup); - /* Networking helper routines. */ --EXPORT_SYMBOL(csum_partial_copy_generic); - /* Delay loops */ - EXPORT_SYMBOL(__ndelay); - EXPORT_SYMBOL(__udelay); -@@ -112,13 +111,17 @@ EXPORT_SYMBOL_NOVERS(__get_user_4); - EXPORT_SYMBOL(strpbrk); - EXPORT_SYMBOL(strstr); - -+#if !defined(CONFIG_X86_UACCESS_INDIRECT) - EXPORT_SYMBOL(strncpy_from_user); --EXPORT_SYMBOL(__strncpy_from_user); -+EXPORT_SYMBOL(__direct_strncpy_from_user); - EXPORT_SYMBOL(clear_user); - EXPORT_SYMBOL(__clear_user); - EXPORT_SYMBOL(__copy_from_user_ll); - EXPORT_SYMBOL(__copy_to_user_ll); - EXPORT_SYMBOL(strnlen_user); -+#else /* CONFIG_X86_UACCESS_INDIRECT */ -+EXPORT_SYMBOL(direct_csum_partial_copy_generic); -+#endif - - EXPORT_SYMBOL(dma_alloc_coherent); - EXPORT_SYMBOL(dma_free_coherent); ---- linux-2.6.0/arch/i386/kernel/i387.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/i387.c 2003-12-28 23:26:36.000000000 -0800 -@@ -218,6 +218,7 @@ void set_fpu_mxcsr( struct task_struct * - static int convert_fxsr_to_user( struct _fpstate __user *buf, - struct i387_fxsave_struct *fxsave ) - { -+ struct _fpreg tmp[8]; /* 80 bytes scratch area */ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; -@@ -234,23 +235,25 @@ static int convert_fxsr_to_user( struct - if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) - return 1; - -- to = &buf->_st[0]; -+ to = tmp; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long *t = (unsigned long *)to; - unsigned long *f = (unsigned long *)from; - -- if (__put_user(*f, t) || -- __put_user(*(f + 1), t + 1) || -- __put_user(from->exponent, &to->exponent)) -- return 1; -+ *t = *f; -+ *(t + 1) = *(f+1); -+ to->exponent = from->exponent; - } -+ if (copy_to_user(buf->_st, tmp, sizeof(struct _fpreg [8]))) -+ return 1; - return 0; - } - - static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, - struct _fpstate __user *buf ) - { -+ struct _fpreg tmp[8]; /* 80 bytes scratch area */ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; -@@ -258,6 +261,8 @@ static int convert_fxsr_from_user( struc - - if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) - return 1; -+ if (copy_from_user(tmp, buf->_st, sizeof(struct _fpreg [8]))) -+ return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); -@@ -269,15 +274,14 @@ static int convert_fxsr_from_user( struc - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; -- from = &buf->_st[0]; -+ from = tmp; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long *t = (unsigned long *)to; - unsigned long *f = (unsigned long *)from; - -- if (__get_user(*t, f) || -- __get_user(*(t + 1), f + 1) || -- __get_user(to->exponent, &from->exponent)) -- return 1; -+ *t = *f; -+ *(t + 1) = *(f + 1); -+ to->exponent = from->exponent; - } - return 0; - } ---- linux-2.6.0/arch/i386/kernel/i8259.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/i8259.c 2003-12-28 23:21:44.000000000 -0800 -@@ -419,8 +419,10 @@ void __init init_IRQ(void) - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ -- for (i = 0; i < NR_IRQS; i++) { -+ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; -+ if (i >= NR_IRQS) -+ break; - if (vector != SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } ---- linux-2.6.0/arch/i386/kernel/init_task.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/init_task.c 2003-12-28 23:26:36.000000000 -0800 -@@ -26,7 +26,7 @@ EXPORT_SYMBOL(init_mm); - */ - union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -- { INIT_THREAD_INFO(init_task) }; -+ { INIT_THREAD_INFO(init_task, init_thread_union) }; - - /* - * Initial task structure. -@@ -44,5 +44,5 @@ EXPORT_SYMBOL(init_task); - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ --struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; -+struct tss_struct init_tss[NR_CPUS] __attribute__((__section__(".data.tss"))) = { [0 ... NR_CPUS-1] = INIT_TSS }; - ---- linux-2.6.0/arch/i386/kernel/io_apic.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/io_apic.c 2003-12-28 23:21:44.000000000 -0800 -@@ -76,6 +76,14 @@ static struct irq_pin_list { - int apic, pin, next; - } irq_2_pin[PIN_MAP_SIZE]; - -+#ifdef CONFIG_PCI_USE_VECTOR -+int vector_irq[NR_IRQS] = { [0 ... NR_IRQS -1] = -1}; -+#define vector_to_irq(vector) \ -+ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) -+#else -+#define vector_to_irq(vector) (vector) -+#endif -+ - /* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super -@@ -249,7 +257,7 @@ static void clear_IO_APIC (void) - clear_IO_APIC_pin(apic, pin); - } - --static void set_ioapic_affinity(unsigned int irq, cpumask_t cpumask) -+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) - { - unsigned long flags; - int pin; -@@ -288,7 +296,7 @@ static void set_ioapic_affinity(unsigned - - extern cpumask_t irq_affinity[NR_IRQS]; - --static cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; -+cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; - - #define IRQBALANCE_CHECK_ARCH -999 - static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; -@@ -670,13 +678,11 @@ static int __init irqbalance_disable(cha - - __setup("noirqbalance", irqbalance_disable); - --static void set_ioapic_affinity(unsigned int irq, cpumask_t mask); -- - static inline void move_irq(int irq) - { - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { -- set_ioapic_affinity(irq, pending_irq_balance_cpumask[irq]); -+ set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } - } -@@ -853,7 +859,7 @@ void __init setup_ioapic_dest(cpumask_t - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); -- set_ioapic_affinity(irq, mask); -+ set_ioapic_affinity_irq(irq, mask); - } - - } -@@ -1141,7 +1147,8 @@ static inline int IO_APIC_irq_trigger(in - /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ - u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; - --static int __init assign_irq_vector(int irq) -+#ifndef CONFIG_PCI_USE_VECTOR -+int __init assign_irq_vector(int irq) - { - static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - BUG_ON(irq >= NR_IRQ_VECTORS); -@@ -1158,11 +1165,36 @@ next: - } - - IO_APIC_VECTOR(irq) = current_vector; -+ - return current_vector; - } -+#endif -+ -+static struct hw_interrupt_type ioapic_level_type; -+static struct hw_interrupt_type ioapic_edge_type; - --static struct hw_interrupt_type ioapic_level_irq_type; --static struct hw_interrupt_type ioapic_edge_irq_type; -+#define IOAPIC_AUTO -1 -+#define IOAPIC_EDGE 0 -+#define IOAPIC_LEVEL 1 -+ -+static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) -+{ -+ if (use_pci_vector() && !platform_legacy_irq(irq)) { -+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || -+ trigger == IOAPIC_LEVEL) -+ irq_desc[vector].handler = &ioapic_level_type; -+ else -+ irq_desc[vector].handler = &ioapic_edge_type; -+ set_intr_gate(vector, interrupt[vector]); -+ } else { -+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || -+ trigger == IOAPIC_LEVEL) -+ irq_desc[irq].handler = &ioapic_level_type; -+ else -+ irq_desc[irq].handler = &ioapic_edge_type; -+ set_intr_gate(vector, interrupt[irq]); -+ } -+} - - void __init setup_IO_APIC_irqs(void) - { -@@ -1220,13 +1252,7 @@ void __init setup_IO_APIC_irqs(void) - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; -- -- if (IO_APIC_irq_trigger(irq)) -- irq_desc[irq].handler = &ioapic_level_irq_type; -- else -- irq_desc[irq].handler = &ioapic_edge_irq_type; -- -- set_intr_gate(vector, interrupt[irq]); -+ ioapic_register_intr(irq, vector, IOAPIC_AUTO); - - if (!apic && (irq < 16)) - disable_8259A_irq(irq); -@@ -1273,7 +1299,7 @@ void __init setup_ExtINT_IRQ0_pin(unsign - * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... - */ -- irq_desc[0].handler = &ioapic_edge_irq_type; -+ irq_desc[0].handler = &ioapic_edge_type; - - /* - * Add it to the IO-APIC irq-routing table: -@@ -1624,10 +1650,6 @@ static void __init setup_ioapic_ids_from - unsigned char old_id; - unsigned long flags; - -- if (acpi_ioapic) -- /* This gets done during IOAPIC enumeration for ACPI. */ -- return; -- - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. -@@ -1763,9 +1785,6 @@ static int __init timer_irq_works(void) - * that was delayed but this is now handled in the device - * independent code. - */ --#define enable_edge_ioapic_irq unmask_IO_APIC_irq -- --static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } - - /* - * Starting up a edge-triggered IO-APIC interrupt is -@@ -1776,7 +1795,6 @@ static void disable_edge_ioapic_irq (uns - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ -- - static unsigned int startup_edge_ioapic_irq(unsigned int irq) - { - int was_pending = 0; -@@ -1794,8 +1812,6 @@ static unsigned int startup_edge_ioapic_ - return was_pending; - } - --#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq -- - /* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled -@@ -1810,9 +1826,6 @@ static void ack_edge_ioapic_irq(unsigned - ack_APIC_irq(); - } - --static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } -- -- - /* - * Level triggered interrupts can just be masked, - * and shutting down and starting up the interrupt -@@ -1834,10 +1847,6 @@ static unsigned int startup_level_ioapic - return 0; /* don't check for pending */ - } - --#define shutdown_level_ioapic_irq mask_IO_APIC_irq --#define enable_level_ioapic_irq unmask_IO_APIC_irq --#define disable_level_ioapic_irq mask_IO_APIC_irq -- - static void end_level_ioapic_irq (unsigned int irq) - { - unsigned long v; -@@ -1864,6 +1873,7 @@ static void end_level_ioapic_irq (unsign - * The idea is from Manfred Spraul. --macro - */ - i = IO_APIC_VECTOR(irq); -+ - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); -@@ -1898,7 +1908,57 @@ static void end_level_ioapic_irq (unsign - } - } - --static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } -+#ifdef CONFIG_PCI_USE_VECTOR -+static unsigned int startup_edge_ioapic_vector(unsigned int vector) -+{ -+ int irq = vector_to_irq(vector); -+ -+ return startup_edge_ioapic_irq(irq); -+} -+ -+static void ack_edge_ioapic_vector(unsigned int vector) -+{ -+ int irq = vector_to_irq(vector); -+ -+ ack_edge_ioapic_irq(irq); -+} -+ -+static unsigned int startup_level_ioapic_vector (unsigned int vector) -+{ -+ int irq = vector_to_irq(vector); -+ -+ return startup_level_ioapic_irq (irq); -+} -+ -+static void end_level_ioapic_vector (unsigned int vector) -+{ -+ int irq = vector_to_irq(vector); -+ -+ end_level_ioapic_irq(irq); -+} -+ -+static void mask_IO_APIC_vector (unsigned int vector) -+{ -+ int irq = vector_to_irq(vector); -+ -+ mask_IO_APIC_irq(irq); -+} -+ -+static void unmask_IO_APIC_vector (unsigned int vector) -+{ -+ int irq = vector_to_irq(vector); -+ -+ unmask_IO_APIC_irq(irq); -+} -+ -+static void set_ioapic_affinity_vector (unsigned int vector, -+ cpumask_t cpu_mask) -+{ -+ int irq = vector_to_irq(vector); -+ -+ set_ioapic_affinity_irq(irq, cpu_mask); -+} -+#endif - - /* - * Level and edge triggered IO-APIC interrupts need different handling, -@@ -1908,26 +1968,25 @@ static void mask_and_ack_level_ioapic_ir - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ -- --static struct hw_interrupt_type ioapic_edge_irq_type = { -+static struct hw_interrupt_type ioapic_edge_type = { - .typename = "IO-APIC-edge", -- .startup = startup_edge_ioapic_irq, -- .shutdown = shutdown_edge_ioapic_irq, -- .enable = enable_edge_ioapic_irq, -- .disable = disable_edge_ioapic_irq, -- .ack = ack_edge_ioapic_irq, -- .end = end_edge_ioapic_irq, -+ .startup = startup_edge_ioapic, -+ .shutdown = shutdown_edge_ioapic, -+ .enable = enable_edge_ioapic, -+ .disable = disable_edge_ioapic, -+ .ack = ack_edge_ioapic, -+ .end = end_edge_ioapic, - .set_affinity = set_ioapic_affinity, - }; - --static struct hw_interrupt_type ioapic_level_irq_type = { -+static struct hw_interrupt_type ioapic_level_type = { - .typename = "IO-APIC-level", -- .startup = startup_level_ioapic_irq, -- .shutdown = shutdown_level_ioapic_irq, -- .enable = enable_level_ioapic_irq, -- .disable = disable_level_ioapic_irq, -- .ack = mask_and_ack_level_ioapic_irq, -- .end = end_level_ioapic_irq, -+ .startup = startup_level_ioapic, -+ .shutdown = shutdown_level_ioapic, -+ .enable = enable_level_ioapic, -+ .disable = disable_level_ioapic, -+ .ack = mask_and_ack_level_ioapic, -+ .end = end_level_ioapic, - .set_affinity = set_ioapic_affinity, - }; - -@@ -1947,7 +2006,13 @@ static inline void init_IO_APIC_traps(vo - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for (irq = 0; irq < NR_IRQS ; irq++) { -- if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { -+ int tmp = irq; -+ if (use_pci_vector()) { -+ if (!platform_legacy_irq(tmp)) -+ if ((tmp = vector_to_irq(tmp)) == -1) -+ continue; -+ } -+ if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 -@@ -2217,12 +2282,14 @@ void __init setup_IO_APIC(void) - /* - * Set up IO-APIC IRQ routing. - */ -- setup_ioapic_ids_from_mpc(); -+ if (!acpi_ioapic) -+ setup_ioapic_ids_from_mpc(); - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); -- print_IO_APIC(); -+ if (!acpi_ioapic) -+ print_IO_APIC(); - } - - /* -@@ -2379,10 +2446,12 @@ int io_apic_set_pci_routing (int ioapic, - "IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); - -+ if (use_pci_vector() && !platform_legacy_irq(irq)) -+ irq = IO_APIC_VECTOR(irq); - if (edge_level) { -- irq_desc[irq].handler = &ioapic_level_irq_type; -+ irq_desc[irq].handler = &ioapic_level_type; - } else { -- irq_desc[irq].handler = &ioapic_edge_irq_type; -+ irq_desc[irq].handler = &ioapic_edge_type; - } - - set_intr_gate(entry.vector, interrupt[irq]); ---- linux-2.6.0/arch/i386/kernel/irq.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/irq.c 2003-12-28 23:22:10.000000000 -0800 -@@ -138,17 +138,19 @@ atomic_t irq_mis_count; - - int show_interrupts(struct seq_file *p, void *v) - { -- int i, j; -+ int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - -- seq_printf(p, " "); -- for (j=0; j HEX_DIGITS) -- count = HEX_DIGITS; -- if (copy_from_user(hexnum, buffer, count)) -- return -EFAULT; -- -- /* -- * Parse the first HEX_DIGITS characters as a hex string, any non-hex char -- * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. -- */ -- -- for (i = 0; i < count; i++) { -- unsigned int c = hexnum[i]; -- int k; -- -- switch (c) { -- case '0' ... '9': c -= '0'; break; -- case 'a' ... 'f': c -= 'a'-10; break; -- case 'A' ... 'F': c -= 'A'-10; break; -- default: -- goto out; -- } -- cpus_shift_left(value, value, 4); -- for (k = 0; k < 4; ++k) -- if (test_bit(k, (unsigned long *)&c)) -- cpu_set(k, value); -- } --out: -- *ret = value; -- return 0; --} -- - #ifdef CONFIG_SMP - - static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; -@@ -949,20 +915,10 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 . - static int irq_affinity_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) - { -- int k, len; -- cpumask_t tmp = irq_affinity[(long)data]; -- -- if (count < HEX_DIGITS+1) -+ int len = cpumask_snprintf(page, count, irq_affinity[(long)data]); -+ if (count - len < 2) - return -EINVAL; -- -- len = 0; -- for (k = 0; k < sizeof(cpumask_t)/sizeof(u16); ++k) { -- int j = sprintf(page, "%04hx", (u16)cpus_coerce(tmp)); -- len += j; -- page += j; -- cpus_shift_right(tmp, tmp, 16); -- } -- len += sprintf(page, "\n"); -+ len += sprintf(page + len, "\n"); - return len; - } - -@@ -975,7 +931,7 @@ static int irq_affinity_write_proc(struc - if (!irq_desc[irq].handler->set_affinity) - return -EIO; - -- err = parse_hex_value(buffer, count, &new_value); -+ err = cpumask_parse(buffer, count, new_value); - if (err) - return err; - -@@ -1000,10 +956,11 @@ static int irq_affinity_write_proc(struc - static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) - { -- unsigned long *mask = (unsigned long *) data; -- if (count < HEX_DIGITS+1) -+ int len = cpumask_snprintf(page, count, *(cpumask_t *)data); -+ if (count - len < 2) - return -EINVAL; -- return sprintf (page, "%08lx\n", *mask); -+ len += sprintf(page + len, "\n"); -+ return len; - } - - static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer, -@@ -1013,7 +970,7 @@ static int prof_cpu_mask_write_proc (str - unsigned long full_count = count, err; - cpumask_t new_value; - -- err = parse_hex_value(buffer, count, &new_value); -+ err = cpumask_parse(buffer, count, new_value); - if (err) - return err; - ---- /dev/null 2002-08-30 16:31:37.000000000 -0700 -+++ 25/arch/i386/kernel/kgdb_stub.c 2003-12-28 23:21:09.000000000 -0800 -@@ -0,0 +1,2457 @@ -+/* -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2, or (at your option) any -+ * later version. -+ * -+ * This program is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ */ -+ -+/* -+ * Copyright (c) 2000 VERITAS Software Corporation. -+ * -+ */ -+/**************************************************************************** -+ * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ -+ * -+ * Module name: remcom.c $ -+ * Revision: 1.34 $ -+ * Date: 91/03/09 12:29:49 $ -+ * Contributor: Lake Stevens Instrument Division$ -+ * -+ * Description: low level support for gdb debugger. $ -+ * -+ * Considerations: only works on target hardware $ -+ * -+ * Written by: Glenn Engel $ -+ * Updated by: David Grothe -+ * Updated by: Robert Walsh -+ * Updated by: wangdi -+ * ModuleState: Experimental $ -+ * -+ * NOTES: See Below $ -+ * -+ * Modified for 386 by Jim Kingdon, Cygnus Support. -+ * Compatibility with 2.1.xx kernel by David Grothe -+ * -+ * Changes to allow auto initilization. All that is needed is that it -+ * be linked with the kernel and a break point (int 3) be executed. -+ * The header file defines BREAKPOINT to allow one to do -+ * this. It should also be possible, once the interrupt system is up, to -+ * call putDebugChar("+"). Once this is done, the remote debugger should -+ * get our attention by sending a ^C in a packet. George Anzinger -+ * -+ * Integrated into 2.2.5 kernel by Tigran Aivazian -+ * Added thread support, support for multiple processors, -+ * support for ia-32(x86) hardware debugging. -+ * Amit S. Kale ( akale@veritas.com ) -+ * -+ * Modified to support debugging over ethernet by Robert Walsh -+ * and wangdi , based on -+ * code by San Mehat. -+ * -+ * -+ * To enable debugger support, two things need to happen. One, a -+ * call to set_debug_traps() is necessary in order to allow any breakpoints -+ * or error conditions to be properly intercepted and reported to gdb. -+ * Two, a breakpoint needs to be generated to begin communication. This -+ * is most easily accomplished by a call to breakpoint(). Breakpoint() -+ * simulates a breakpoint by executing an int 3. -+ * -+ ************* -+ * -+ * The following gdb commands are supported: -+ * -+ * command function Return value -+ * -+ * g return the value of the CPU registers hex data or ENN -+ * G set the value of the CPU registers OK or ENN -+ * -+ * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN -+ * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN -+ * -+ * c Resume at current address SNN ( signal NN) -+ * cAA..AA Continue at address AA..AA SNN -+ * -+ * s Step one instruction SNN -+ * sAA..AA Step one instruction from AA..AA SNN -+ * -+ * k kill -+ * -+ * ? What was the last sigval ? SNN (signal NN) -+ * -+ * All commands and responses are sent with a packet which includes a -+ * checksum. A packet consists of -+ * -+ * $#. -+ * -+ * where -+ * :: -+ * :: < two hex digits computed as modulo 256 sum of > -+ * -+ * When a packet is received, it is first acknowledged with either '+' or '-'. -+ * '+' indicates a successful transfer. '-' indicates a failed transfer. -+ * -+ * Example: -+ * -+ * Host: Reply: -+ * $m0,10#2a +$00010203040506070809101112131415#42 -+ * -+ ****************************************************************************/ -+#define KGDB_VERSION "<20030915.1651.33>" -+#include -+#include -+#include /* for strcpy */ -+#include -+#include -+#include -+#include -+#include /* for linux pt_regs struct */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/************************************************************************ -+ * -+ * external low-level support routines -+ */ -+typedef void (*Function) (void); /* pointer to a function */ -+ -+/* Thread reference */ -+typedef unsigned char threadref[8]; -+ -+extern int tty_putDebugChar(int); /* write a single character */ -+extern int tty_getDebugChar(void); /* read and return a single char */ -+extern void tty_flushDebugChar(void); /* flush pending characters */ -+extern int eth_putDebugChar(int); /* write a single character */ -+extern int eth_getDebugChar(void); /* read and return a single char */ -+extern void eth_flushDebugChar(void); /* flush pending characters */ -+ -+/************************************************************************/ -+/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ -+/* at least NUMREGBYTES*2 are needed for register packets */ -+/* Longer buffer is needed to list all threads */ -+#define BUFMAX 400 -+ -+char *kgdb_version = KGDB_VERSION; -+ -+/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ -+int debug_regs = 0; /* set to non-zero to print registers */ -+ -+/* filled in by an external module */ -+char *gdb_module_offsets; -+ -+static const char hexchars[] = "0123456789abcdef"; -+ -+/* Number of bytes of registers. */ -+#define NUMREGBYTES 64 -+/* -+ * Note that this register image is in a different order than -+ * the register image that Linux produces at interrupt time. -+ * -+ * Linux's register image is defined by struct pt_regs in ptrace.h. -+ * Just why GDB uses a different order is a historical mystery. -+ */ -+enum regnames { _EAX, /* 0 */ -+ _ECX, /* 1 */ -+ _EDX, /* 2 */ -+ _EBX, /* 3 */ -+ _ESP, /* 4 */ -+ _EBP, /* 5 */ -+ _ESI, /* 6 */ -+ _EDI, /* 7 */ -+ _PC /* 8 also known as eip */ , -+ _PS /* 9 also known as eflags */ , -+ _CS, /* 10 */ -+ _SS, /* 11 */ -+ _DS, /* 12 */ -+ _ES, /* 13 */ -+ _FS, /* 14 */ -+ _GS /* 15 */ -+}; -+ -+/*************************** ASSEMBLY CODE MACROS *************************/ -+/* -+ * Put the error code here just in case the user cares. -+ * Likewise, the vector number here (since GDB only gets the signal -+ * number through the usual means, and that's not very specific). -+ * The called_from is the return address so he can tell how we entered kgdb. -+ * This will allow him to seperate out the various possible entries. -+ */ -+#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ -+ -+#define PID_MAX PID_MAX_DEFAULT -+ -+#ifdef CONFIG_SMP -+void smp_send_nmi_allbutself(void); -+#define IF_SMP(x) x -+#undef MAX_NO_CPUS -+#ifndef CONFIG_NO_KGDB_CPUS -+#define CONFIG_NO_KGDB_CPUS 2 -+#endif -+#if CONFIG_NO_KGDB_CPUS > NR_CPUS -+#define MAX_NO_CPUS NR_CPUS -+#else -+#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS -+#endif -+#define hold_init hold_on_sstep: 1, -+#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) -+#define NUM_CPUS num_online_cpus() -+#else -+#define IF_SMP(x) -+#define hold_init -+#undef MAX_NO_CPUS -+#define MAX_NO_CPUS 1 -+#define NUM_CPUS 1 -+#endif -+#define NOCPU (struct task_struct *)0xbad1fbad -+/* *INDENT-OFF* */ -+struct kgdb_info { -+ int used_malloc; -+ void *called_from; -+ long long entry_tsc; -+ int errcode; -+ int vector; -+ int print_debug_info; -+#ifdef CONFIG_SMP -+ int hold_on_sstep; -+ struct { -+ volatile struct task_struct *task; -+ int pid; -+ int hold; -+ struct pt_regs *regs; -+ } cpus_waiting[MAX_NO_CPUS]; -+#endif -+} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; -+ -+/* *INDENT-ON* */ -+ -+#define used_m kgdb_info.used_malloc -+/* -+ * This is little area we set aside to contain the stack we -+ * need to build to allow gdb to call functions. We use one -+ * per cpu to avoid locking issues. We will do all this work -+ * with interrupts off so that should take care of the protection -+ * issues. -+ */ -+#define LOOKASIDE_SIZE 200 /* should be more than enough */ -+#define MALLOC_MAX 200 /* Max malloc size */ -+struct { -+ unsigned int esp; -+ int array[LOOKASIDE_SIZE]; -+} fn_call_lookaside[MAX_NO_CPUS]; -+ -+static int trap_cpu; -+static unsigned int OLD_esp; -+ -+#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] -+#define IF_BIT 0x200 -+#define TF_BIT 0x100 -+ -+#define MALLOC_ROUND 8-1 -+ -+static char malloc_array[MALLOC_MAX]; -+IF_SMP(static void to_gdb(const char *mess)); -+void * -+malloc(int size) -+{ -+ -+ if (size <= (MALLOC_MAX - used_m)) { -+ int old_used = used_m; -+ used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); -+ return &malloc_array[old_used]; -+ } else { -+ return NULL; -+ } -+} -+ -+/* -+ * I/O dispatch functions... -+ * Based upon kgdboe, either call the ethernet -+ * handler or the serial one.. -+ */ -+void -+putDebugChar(int c) -+{ -+ if (!kgdboe) { -+ tty_putDebugChar(c); -+ } else { -+ eth_putDebugChar(c); -+ } -+} -+ -+int -+getDebugChar(void) -+{ -+ if (!kgdboe) { -+ return tty_getDebugChar(); -+ } else { -+ return eth_getDebugChar(); -+ } -+} -+ -+void -+flushDebugChar(void) -+{ -+ if (!kgdboe) { -+ tty_flushDebugChar(); -+ } else { -+ eth_flushDebugChar(); -+ } -+} -+ -+/* -+ * Gdb calls functions by pushing agruments, including a return address -+ * on the stack and the adjusting EIP to point to the function. The -+ * whole assumption in GDB is that we are on a different stack than the -+ * one the "user" i.e. code that hit the break point, is on. This, of -+ * course is not true in the kernel. Thus various dodges are needed to -+ * do the call without directly messing with EIP (which we can not change -+ * as it is just a location and not a register. To adjust it would then -+ * require that we move every thing below EIP up or down as needed. This -+ * will not work as we may well have stack relative pointer on the stack -+ * (such as the pointer to regs, for example). -+ -+ * So here is what we do: -+ * We detect gdb attempting to store into the stack area and instead, store -+ * into the fn_call_lookaside.array at the same relative location as if it -+ * were the area ESP pointed at. We also trap ESP modifications -+ * and uses these to adjust fn_call_lookaside.esp. On entry -+ * fn_call_lookaside.esp will be set to point at the last entry in -+ * fn_call_lookaside.array. This allows us to check if it has changed, and -+ * if so, on exit, we add the registers we will use to do the move and a -+ * trap/ interrupt return exit sequence. We then adjust the eflags in the -+ * regs array (remember we now have a copy in the fn_call_lookaside.array) to -+ * kill the interrupt bit, AND we change EIP to point at our set up stub. -+ * As part of the register set up we preset the registers to point at the -+ * begining and end of the fn_call_lookaside.array, so all the stub needs to -+ * do is move words from the array to the stack until ESP= the desired value -+ * then do the rti. This will then transfer to the desired function with -+ * all the correct registers. Nifty huh? -+ */ -+extern asmlinkage void fn_call_stub(void); -+extern asmlinkage void fn_rtn_stub(void); -+/* *INDENT-OFF* */ -+__asm__("fn_rtn_stub:\n\t" -+ "movl %eax,%esp\n\t" -+ "fn_call_stub:\n\t" -+ "1:\n\t" -+ "addl $-4,%ebx\n\t" -+ "movl (%ebx), %eax\n\t" -+ "pushl %eax\n\t" -+ "cmpl %esp,%ecx\n\t" -+ "jne 1b\n\t" -+ "popl %eax\n\t" -+ "popl %ebx\n\t" -+ "popl %ecx\n\t" -+ "iret \n\t"); -+/* *INDENT-ON* */ -+#define gdb_i386vector kgdb_info.vector -+#define gdb_i386errcode kgdb_info.errcode -+#define waiting_cpus kgdb_info.cpus_waiting -+#define remote_debug kgdb_info.print_debug_info -+#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold -+/* gdb locks */ -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_called; -+static spinlock_t waitlocks[MAX_NO_CPUS] = -+ {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; -+/* -+ * The following array has the thread pointer of each of the "other" -+ * cpus. We make it global so it can be seen by gdb. -+ */ -+volatile int in_kgdb_entry_log[MAX_NO_CPUS]; -+volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; -+/* -+static spinlock_t continuelocks[MAX_NO_CPUS]; -+*/ -+spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; -+/* waiters on our spinlock plus us */ -+static atomic_t spinlock_waiters = ATOMIC_INIT(1); -+static int spinlock_count = 0; -+static int spinlock_cpu = 0; -+/* -+ * Note we use nested spin locks to account for the case where a break -+ * point is encountered when calling a function by user direction from -+ * kgdb. Also there is the memory exception recursion to account for. -+ * Well, yes, but this lets other cpus thru too. Lets add a -+ * cpu id to the lock. -+ */ -+#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ -+ spinlock_cpu != smp_processor_id()){\ -+ atomic_inc(&spinlock_waiters); \ -+ while (! spin_trylock(x)) {\ -+ in_kgdb(®s);\ -+ }\ -+ atomic_dec(&spinlock_waiters); \ -+ spinlock_count = 1; \ -+ spinlock_cpu = smp_processor_id(); \ -+ }else{ \ -+ spinlock_count++; \ -+ } -+#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) -+#else -+unsigned kgdb_spinlock = 0; -+#define KGDB_SPIN_LOCK(x) --*x -+#define KGDB_SPIN_UNLOCK(x) ++*x -+#endif -+ -+int -+hex(char ch) -+{ -+ if ((ch >= 'a') && (ch <= 'f')) -+ return (ch - 'a' + 10); -+ if ((ch >= '0') && (ch <= '9')) -+ return (ch - '0'); -+ if ((ch >= 'A') && (ch <= 'F')) -+ return (ch - 'A' + 10); -+ return (-1); -+} -+ -+/* scan for the sequence $# */ -+void -+getpacket(char *buffer) -+{ -+ unsigned char checksum; -+ unsigned char xmitcsum; -+ int i; -+ int count; -+ char ch; -+ -+ do { -+ /* wait around for the start character, ignore all other characters */ -+ while ((ch = (getDebugChar() & 0x7f)) != '$') ; -+ checksum = 0; -+ xmitcsum = -1; -+ -+ count = 0; -+ -+ /* now, read until a # or end of buffer is found */ -+ while (count < BUFMAX) { -+ ch = getDebugChar() & 0x7f; -+ if (ch == '#') -+ break; -+ checksum = checksum + ch; -+ buffer[count] = ch; -+ count = count + 1; -+ } -+ buffer[count] = 0; -+ -+ if (ch == '#') { -+ xmitcsum = hex(getDebugChar() & 0x7f) << 4; -+ xmitcsum += hex(getDebugChar() & 0x7f); -+ if ((remote_debug) && (checksum != xmitcsum)) { -+ printk -+ ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", -+ checksum, xmitcsum, buffer); -+ } -+ -+ if (checksum != xmitcsum) -+ putDebugChar('-'); /* failed checksum */ -+ else { -+ putDebugChar('+'); /* successful transfer */ -+ /* if a sequence char is present, reply the sequence ID */ -+ if (buffer[2] == ':') { -+ putDebugChar(buffer[0]); -+ putDebugChar(buffer[1]); -+ /* remove sequence chars from buffer */ -+ count = strlen(buffer); -+ for (i = 3; i <= count; i++) -+ buffer[i - 3] = buffer[i]; -+ } -+ } -+ } -+ } while (checksum != xmitcsum); -+ -+ if (remote_debug) -+ printk("R:%s\n", buffer); -+ flushDebugChar(); -+} -+ -+/* send the packet in buffer. */ -+ -+void -+putpacket(char *buffer) -+{ -+ unsigned char checksum; -+ int count; -+ char ch; -+ -+ /* $#. */ -+ -+ if (!kgdboe) { -+ do { -+ if (remote_debug) -+ printk("T:%s\n", buffer); -+ putDebugChar('$'); -+ checksum = 0; -+ count = 0; -+ -+ while ((ch = buffer[count])) { -+ putDebugChar(ch); -+ checksum += ch; -+ count += 1; -+ } -+ -+ putDebugChar('#'); -+ putDebugChar(hexchars[checksum >> 4]); -+ putDebugChar(hexchars[checksum % 16]); -+ flushDebugChar(); -+ -+ } while ((getDebugChar() & 0x7f) != '+'); -+ } else { -+ /* -+ * For udp, we can not transfer too much bytes once. -+ * We only transfer MAX_SEND_COUNT size bytes each time -+ */ -+ -+#define MAX_SEND_COUNT 30 -+ -+ int send_count = 0, i = 0; -+ char send_buf[MAX_SEND_COUNT]; -+ -+ do { -+ if (remote_debug) -+ printk("T:%s\n", buffer); -+ putDebugChar('$'); -+ checksum = 0; -+ count = 0; -+ send_count = 0; -+ while ((ch = buffer[count])) { -+ if (send_count >= MAX_SEND_COUNT) { -+ for(i = 0; i < MAX_SEND_COUNT; i++) { -+ putDebugChar(send_buf[i]); -+ } -+ flushDebugChar(); -+ send_count = 0; -+ } else { -+ send_buf[send_count] = ch; -+ checksum += ch; -+ count ++; -+ send_count++; -+ } -+ } -+ for(i = 0; i < send_count; i++) -+ putDebugChar(send_buf[i]); -+ putDebugChar('#'); -+ putDebugChar(hexchars[checksum >> 4]); -+ putDebugChar(hexchars[checksum % 16]); -+ flushDebugChar(); -+ } while ((getDebugChar() & 0x7f) != '+'); -+ } -+} -+ -+static char remcomInBuffer[BUFMAX]; -+static char remcomOutBuffer[BUFMAX]; -+static short error; -+ -+void -+debug_error(char *format, char *parm) -+{ -+ if (remote_debug) -+ printk(format, parm); -+} -+ -+static void -+print_regs(struct pt_regs *regs) -+{ -+ printk("EAX=%08lx ", regs->eax); -+ printk("EBX=%08lx ", regs->ebx); -+ printk("ECX=%08lx ", regs->ecx); -+ printk("EDX=%08lx ", regs->edx); -+ printk("\n"); -+ printk("ESI=%08lx ", regs->esi); -+ printk("EDI=%08lx ", regs->edi); -+ printk("EBP=%08lx ", regs->ebp); -+ printk("ESP=%08lx ", (long) ®s->esp); -+ printk("\n"); -+ printk(" DS=%08x ", regs->xds); -+ printk(" ES=%08x ", regs->xes); -+ printk(" SS=%08x ", __KERNEL_DS); -+ printk(" FL=%08lx ", regs->eflags); -+ printk("\n"); -+ printk(" CS=%08x ", regs->xcs); -+ printk(" IP=%08lx ", regs->eip); -+#if 0 -+ printk(" FS=%08x ", regs->fs); -+ printk(" GS=%08x ", regs->gs); -+#endif -+ printk("\n"); -+ -+} /* print_regs */ -+ -+#define NEW_esp fn_call_lookaside[trap_cpu].esp -+ -+static void -+regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ gdb_regs[_EAX] = regs->eax; -+ gdb_regs[_EBX] = regs->ebx; -+ gdb_regs[_ECX] = regs->ecx; -+ gdb_regs[_EDX] = regs->edx; -+ gdb_regs[_ESI] = regs->esi; -+ gdb_regs[_EDI] = regs->edi; -+ gdb_regs[_EBP] = regs->ebp; -+ gdb_regs[_DS] = regs->xds; -+ gdb_regs[_ES] = regs->xes; -+ gdb_regs[_PS] = regs->eflags; -+ gdb_regs[_CS] = regs->xcs; -+ gdb_regs[_PC] = regs->eip; -+ /* Note, as we are a debugging the kernel, we will always -+ * trap in kernel code, this means no priviledge change, -+ * and so the pt_regs structure is not completely valid. In a non -+ * privilege change trap, only EFLAGS, CS and EIP are put on the stack, -+ * SS and ESP are not stacked, this means that the last 2 elements of -+ * pt_regs is not valid (they would normally refer to the user stack) -+ * also, using regs+1 is no good because you end up will a value that is -+ * 2 longs (8) too high. This used to cause stepping over functions -+ * to fail, so my fix is to use the address of regs->esp, which -+ * should point at the end of the stack frame. Note I have ignored -+ * completely exceptions that cause an error code to be stacked, such -+ * as double fault. Stuart Hughes, Zentropix. -+ * original code: gdb_regs[_ESP] = (int) (regs + 1) ; -+ -+ * this is now done on entry and moved to OLD_esp (as well as NEW_esp). -+ */ -+ gdb_regs[_ESP] = NEW_esp; -+ gdb_regs[_SS] = __KERNEL_DS; -+ gdb_regs[_FS] = 0xFFFF; -+ gdb_regs[_GS] = 0xFFFF; -+} /* regs_to_gdb_regs */ -+ -+static void -+gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) -+{ -+ regs->eax = gdb_regs[_EAX]; -+ regs->ebx = gdb_regs[_EBX]; -+ regs->ecx = gdb_regs[_ECX]; -+ regs->edx = gdb_regs[_EDX]; -+ regs->esi = gdb_regs[_ESI]; -+ regs->edi = gdb_regs[_EDI]; -+ regs->ebp = gdb_regs[_EBP]; -+ regs->xds = gdb_regs[_DS]; -+ regs->xes = gdb_regs[_ES]; -+ regs->eflags = gdb_regs[_PS]; -+ regs->xcs = gdb_regs[_CS]; -+ regs->eip = gdb_regs[_PC]; -+ NEW_esp = gdb_regs[_ESP]; /* keep the value */ -+#if 0 /* can't change these */ -+ regs->esp = gdb_regs[_ESP]; -+ regs->xss = gdb_regs[_SS]; -+ regs->fs = gdb_regs[_FS]; -+ regs->gs = gdb_regs[_GS]; -+#endif -+ -+} /* gdb_regs_to_regs */ -+extern void scheduling_functions_start_here(void); -+extern void scheduling_functions_end_here(void); -+#define first_sched ((unsigned long) scheduling_functions_start_here) -+#define last_sched ((unsigned long) scheduling_functions_end_here) -+ -+int thread_list = 0; -+ -+void -+get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) -+{ -+ unsigned long stack_page; -+ int count = 0; -+ IF_SMP(int i); -+ if (!p || p == current) { -+ regs_to_gdb_regs(gdb_regs, regs); -+ return; -+ } -+#ifdef CONFIG_SMP -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (p == kgdb_info.cpus_waiting[i].task) { -+ regs_to_gdb_regs(gdb_regs, -+ kgdb_info.cpus_waiting[i].regs); -+ gdb_regs[_ESP] = -+ (int) &kgdb_info.cpus_waiting[i].regs->esp; -+ -+ return; -+ } -+ } -+#endif -+ memset(gdb_regs, 0, NUMREGBYTES); -+ gdb_regs[_ESP] = p->thread.esp; -+ gdb_regs[_PC] = p->thread.eip; -+ gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; -+ gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); -+ gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); -+ -+/* -+ * This code is to give a more informative notion of where a process -+ * is waiting. It is used only when the user asks for a thread info -+ * list. If he then switches to the thread, s/he will find the task -+ * is in schedule, but a back trace should show the same info we come -+ * up with. This code was shamelessly purloined from process.c. It was -+ * then enhanced to provide more registers than simply the program -+ * counter. -+ */ -+ -+ if (!thread_list) { -+ return; -+ } -+ -+ if (p->state == TASK_RUNNING) -+ return; -+ stack_page = (unsigned long) p->thread_info; -+ if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > 8188 + stack_page) -+ return; -+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */ -+ do { -+ if (gdb_regs[_EBP] < stack_page || -+ gdb_regs[_EBP] > 8184 + stack_page) -+ return; -+ gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); -+ gdb_regs[_ESP] = gdb_regs[_EBP] + 8; -+ gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; -+ if (gdb_regs[_PC] < first_sched || gdb_regs[_PC] >= last_sched) -+ return; -+ } while (count++ < 16); -+ return; -+} -+ -+/* Indicate to caller of mem2hex or hex2mem that there has been an -+ error. */ -+static volatile int mem_err = 0; -+static volatile int mem_err_expected = 0; -+static volatile int mem_err_cnt = 0; -+static int garbage_loc = -1; -+ -+int -+get_char(char *addr) -+{ -+ return *addr; -+} -+ -+void -+set_char(char *addr, int val, int may_fault) -+{ -+ /* -+ * This code traps references to the area mapped to the kernel -+ * stack as given by the regs and, instead, stores to the -+ * fn_call_lookaside[cpu].array -+ */ -+ if (may_fault && -+ (unsigned int) addr < OLD_esp && -+ ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { -+ addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); -+ } -+ *addr = val; -+} -+ -+/* convert the memory pointed to by mem into hex, placing result in buf */ -+/* return a pointer to the last char put in buf (null) */ -+/* If MAY_FAULT is non-zero, then we should set mem_err in response to -+ a fault; if zero treat a fault like any other fault in the stub. */ -+char * -+mem2hex(char *mem, char *buf, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ /* printk("%lx = ", mem) ; */ -+ -+ ch = get_char(mem++); -+ -+ /* printk("%02x\n", ch & 0xFF) ; */ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault fetching from addr %lx\n", -+ (long) (mem - 1)); -+ *buf = 0; /* truncate buffer */ -+ return (buf); -+ } -+ *buf++ = hexchars[ch >> 4]; -+ *buf++ = hexchars[ch % 16]; -+ } -+ *buf = 0; -+ if (may_fault) -+ mem_err_expected = 0; -+ return (buf); -+} -+ -+/* convert the hex array pointed to by buf into binary to be placed in mem */ -+/* return a pointer to the character AFTER the last byte written */ -+/* NOTE: We use the may fault flag to also indicate if the write is to -+ * the registers (0) or "other" memory (!=0) -+ */ -+char * -+hex2mem(char *buf, char *mem, int count, int may_fault) -+{ -+ int i; -+ unsigned char ch; -+ -+ if (may_fault) { -+ mem_err_expected = 1; -+ mem_err = 0; -+ } -+ for (i = 0; i < count; i++) { -+ ch = hex(*buf++) << 4; -+ ch = ch + hex(*buf++); -+ set_char(mem++, ch, may_fault); -+ -+ if (may_fault && mem_err) { -+ if (remote_debug) -+ printk("Mem fault storing to addr %lx\n", -+ (long) (mem - 1)); -+ return (mem); -+ } -+ } -+ if (may_fault) -+ mem_err_expected = 0; -+ return (mem); -+} -+ -+/**********************************************/ -+/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ -+/* RETURN NUMBER OF CHARS PROCESSED */ -+/**********************************************/ -+int -+hexToInt(char **ptr, int *intValue) -+{ -+ int numChars = 0; -+ int hexValue; -+ -+ *intValue = 0; -+ -+ while (**ptr) { -+ hexValue = hex(**ptr); -+ if (hexValue >= 0) { -+ *intValue = (*intValue << 4) | hexValue; -+ numChars++; -+ } else -+ break; -+ -+ (*ptr)++; -+ } -+ -+ return (numChars); -+} -+ -+#define stubhex(h) hex(h) -+#ifdef old_thread_list -+ -+static int -+stub_unpack_int(char *buff, int fieldlength) -+{ -+ int nibble; -+ int retval = 0; -+ -+ while (fieldlength) { -+ nibble = stubhex(*buff++); -+ retval |= nibble; -+ fieldlength--; -+ if (fieldlength) -+ retval = retval << 4; -+ } -+ return retval; -+} -+#endif -+static char * -+pack_hex_byte(char *pkt, int byte) -+{ -+ *pkt++ = hexchars[(byte >> 4) & 0xf]; -+ *pkt++ = hexchars[(byte & 0xf)]; -+ return pkt; -+} -+ -+#define BUF_THREAD_ID_SIZE 16 -+ -+static char * -+pack_threadid(char *pkt, threadref * id) -+{ -+ char *limit; -+ unsigned char *altid; -+ -+ altid = (unsigned char *) id; -+ limit = pkt + BUF_THREAD_ID_SIZE; -+ while (pkt < limit) -+ pkt = pack_hex_byte(pkt, *altid++); -+ return pkt; -+} -+ -+#ifdef old_thread_list -+static char * -+unpack_byte(char *buf, int *value) -+{ -+ *value = stub_unpack_int(buf, 2); -+ return buf + 2; -+} -+ -+static char * -+unpack_threadid(char *inbuf, threadref * id) -+{ -+ char *altref; -+ char *limit = inbuf + BUF_THREAD_ID_SIZE; -+ int x, y; -+ -+ altref = (char *) id; -+ -+ while (inbuf < limit) { -+ x = stubhex(*inbuf++); -+ y = stubhex(*inbuf++); -+ *altref++ = (x << 4) | y; -+ } -+ return inbuf; -+} -+#endif -+void -+int_to_threadref(threadref * id, int value) -+{ -+ unsigned char *scan; -+ -+ scan = (unsigned char *) id; -+ { -+ int i = 4; -+ while (i--) -+ *scan++ = 0; -+ } -+ *scan++ = (value >> 24) & 0xff; -+ *scan++ = (value >> 16) & 0xff; -+ *scan++ = (value >> 8) & 0xff; -+ *scan++ = (value & 0xff); -+} -+int -+int_to_hex_v(unsigned char * id, int value) -+{ -+ unsigned char *start = id; -+ int shift; -+ int ch; -+ -+ for (shift = 28; shift >= 0; shift -= 4) { -+ if ((ch = (value >> shift) & 0xf) || (id != start)) { -+ *id = hexchars[ch]; -+ id++; -+ } -+ } -+ if (id == start) -+ *id++ = '0'; -+ return id - start; -+} -+#ifdef old_thread_list -+ -+static int -+threadref_to_int(threadref * ref) -+{ -+ int i, value = 0; -+ unsigned char *scan; -+ -+ scan = (char *) ref; -+ scan += 4; -+ i = 4; -+ while (i-- > 0) -+ value = (value << 8) | ((*scan++) & 0xff); -+ return value; -+} -+#endif -+static int -+cmp_str(char *s1, char *s2, int count) -+{ -+ while (count--) { -+ if (*s1++ != *s2++) -+ return 0; -+ } -+ return 1; -+} -+ -+#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ -+extern struct task_struct *kgdb_get_idle(int cpu); -+#define idle_task(cpu) kgdb_get_idle(cpu) -+#else -+#define idle_task(cpu) init_tasks[cpu] -+#endif -+ -+extern int kgdb_pid_init_done; -+ -+struct task_struct * -+getthread(int pid) -+{ -+ struct task_struct *thread; -+ if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { -+ -+ return idle_task(pid - PID_MAX); -+ } else { -+ /* -+ * find_task_by_pid is relatively safe all the time -+ * Other pid functions require lock downs which imply -+ * that we may be interrupting them (as we get here -+ * in the middle of most any lock down). -+ * Still we don't want to call until the table exists! -+ */ -+ if (kgdb_pid_init_done){ -+ thread = find_task_by_pid(pid); -+ if (thread) { -+ return thread; -+ } -+ } -+ } -+ return NULL; -+} -+/* *INDENT-OFF* */ -+struct hw_breakpoint { -+ unsigned enabled; -+ unsigned type; -+ unsigned len; -+ unsigned addr; -+} breakinfo[4] = { {enabled:0}, -+ {enabled:0}, -+ {enabled:0}, -+ {enabled:0}}; -+/* *INDENT-ON* */ -+unsigned hw_breakpoint_status; -+void -+correct_hw_break(void) -+{ -+ int breakno; -+ int correctit; -+ int breakbit; -+ unsigned dr7; -+ -+ asm volatile ("movl %%db7, %0\n":"=r" (dr7) -+ :); -+ /* *INDENT-OFF* */ -+ do { -+ unsigned addr0, addr1, addr2, addr3; -+ asm volatile ("movl %%db0, %0\n" -+ "movl %%db1, %1\n" -+ "movl %%db2, %2\n" -+ "movl %%db3, %3\n" -+ :"=r" (addr0), "=r"(addr1), -+ "=r"(addr2), "=r"(addr3) -+ :); -+ } while (0); -+ /* *INDENT-ON* */ -+ correctit = 0; -+ for (breakno = 0; breakno < 3; breakno++) { -+ breakbit = 2 << (breakno << 1); -+ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 |= breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ dr7 |= (((breakinfo[breakno].len << 2) | -+ breakinfo[breakno].type) << 16) << -+ (breakno << 2); -+ switch (breakno) { -+ case 0: -+ asm volatile ("movl %0, %%dr0\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 1: -+ asm volatile ("movl %0, %%dr1\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 2: -+ asm volatile ("movl %0, %%dr2\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ -+ case 3: -+ asm volatile ("movl %0, %%dr3\n"::"r" -+ (breakinfo[breakno].addr)); -+ break; -+ } -+ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { -+ correctit = 1; -+ dr7 &= ~breakbit; -+ dr7 &= ~(0xf0000 << (breakno << 2)); -+ } -+ } -+ if (correctit) { -+ asm volatile ("movl %0, %%db7\n"::"r" (dr7)); -+ } -+} -+ -+int -+remove_hw_break(unsigned breakno) -+{ -+ if (!breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 0; -+ return 0; -+} -+ -+int -+set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) -+{ -+ if (breakinfo[breakno].enabled) { -+ return -1; -+ } -+ breakinfo[breakno].enabled = 1; -+ breakinfo[breakno].type = type; -+ breakinfo[breakno].len = len; -+ breakinfo[breakno].addr = addr; -+ return 0; -+} -+ -+#ifdef CONFIG_SMP -+static int in_kgdb_console = 0; -+ -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ unsigned flags; -+ int cpu = smp_processor_id(); -+ in_kgdb_called = 1; -+ if (!spin_is_locked(&kgdb_spinlock)) { -+ if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ -+ in_kgdb_console) { /* or we are doing slow i/o */ -+ return 1; -+ } -+ return 0; -+ } -+ -+ /* As I see it the only reason not to let all cpus spin on -+ * the same spin_lock is to allow selected ones to proceed. -+ * This would be a good thing, so we leave it this way. -+ * Maybe someday.... Done ! -+ -+ * in_kgdb() is called from an NMI so we don't pretend -+ * to have any resources, like printk() for example. -+ */ -+ -+ kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ -+ /* -+ * log arival of this cpu -+ * The NMI keeps on ticking. Protect against recurring more -+ * than once, and ignor the cpu that has the kgdb lock -+ */ -+ in_kgdb_entry_log[cpu]++; -+ in_kgdb_here_log[cpu] = regs; -+ if (cpu == spinlock_cpu || waiting_cpus[cpu].task) -+ goto exit_in_kgdb; -+ -+ /* -+ * For protection of the initilization of the spin locks by kgdb -+ * it locks the kgdb spinlock before it gets the wait locks set -+ * up. We wait here for the wait lock to be taken. If the -+ * kgdb lock goes away first?? Well, it could be a slow exit -+ * sequence where the wait lock is removed prior to the kgdb lock -+ * so if kgdb gets unlocked, we just exit. -+ */ -+ -+ while (spin_is_locked(&kgdb_spinlock) && -+ !spin_is_locked(waitlocks + cpu)) ; -+ if (!spin_is_locked(&kgdb_spinlock)) -+ goto exit_in_kgdb; -+ -+ waiting_cpus[cpu].task = current; -+ waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); -+ waiting_cpus[cpu].regs = regs; -+ -+ spin_unlock_wait(waitlocks + cpu); -+ -+ /* -+ * log departure of this cpu -+ */ -+ waiting_cpus[cpu].task = 0; -+ waiting_cpus[cpu].pid = 0; -+ waiting_cpus[cpu].regs = 0; -+ correct_hw_break(); -+ exit_in_kgdb: -+ in_kgdb_here_log[cpu] = 0; -+ kgdb_local_irq_restore(flags); -+ return 1; -+ /* -+ spin_unlock(continuelocks + smp_processor_id()); -+ */ -+} -+ -+void -+smp__in_kgdb(struct pt_regs regs) -+{ -+ ack_APIC_irq(); -+ in_kgdb(®s); -+} -+#else -+int -+in_kgdb(struct pt_regs *regs) -+{ -+ return (kgdb_spinlock); -+} -+#endif -+ -+void -+printexceptioninfo(int exceptionNo, int errorcode, char *buffer) -+{ -+ unsigned dr6; -+ int i; -+ switch (exceptionNo) { -+ case 1: /* debug exception */ -+ break; -+ case 3: /* breakpoint */ -+ sprintf(buffer, "Software breakpoint"); -+ return; -+ default: -+ sprintf(buffer, "Details not available"); -+ return; -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (dr6 & 0x4000) { -+ sprintf(buffer, "Single step"); -+ return; -+ } -+ for (i = 0; i < 4; ++i) { -+ if (dr6 & (1 << i)) { -+ sprintf(buffer, "Hardware breakpoint %d", i); -+ return; -+ } -+ } -+ sprintf(buffer, "Unknown trap"); -+ return; -+} -+ -+/* -+ * This function does all command procesing for interfacing to gdb. -+ * -+ * NOTE: The INT nn instruction leaves the state of the interrupt -+ * enable flag UNCHANGED. That means that when this routine -+ * is entered via a breakpoint (INT 3) instruction from code -+ * that has interrupts enabled, then interrupts will STILL BE -+ * enabled when this routine is entered. The first thing that -+ * we do here is disable interrupts so as to prevent recursive -+ * entries and bothersome serial interrupts while we are -+ * trying to run the serial port in polled mode. -+ * -+ * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so -+ * it is always necessary to do a restore_flags before returning -+ * so as to let go of that lock. -+ */ -+int -+kgdb_handle_exception(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs) -+{ -+ struct task_struct *usethread = NULL; -+ struct task_struct *thread_list_start = 0, *thread = NULL; -+ int addr, length; -+ int breakno, breaktype; -+ char *ptr; -+ int newPC; -+ threadref thref; -+ int threadid; -+ int thread_min = PID_MAX + MAX_NO_CPUS; -+#ifdef old_thread_list -+ int maxthreads; -+#endif -+ int nothreads; -+ unsigned long flags; -+ int gdb_regs[NUMREGBYTES / 4]; -+ int dr6; -+ IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ -+#define NO_NMI 1 -+#define NO_SYNC 2 -+#define regs (*linux_regs) -+#define NUMREGS NUMREGBYTES/4 -+ /* -+ * If the entry is not from the kernel then return to the Linux -+ * trap handler and let it process the interrupt normally. -+ */ -+ if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { -+ printk("ignoring non-kernel exception\n"); -+ print_regs(®s); -+ return (0); -+ } -+ /* -+ * If we're using eth mode, set the 'mode' in the netdevice. -+ */ -+ -+ if (kgdboe) -+ netpoll_set_trap(1); -+ -+ kgdb_local_irq_save(flags); -+ -+ /* Get kgdb spinlock */ -+ -+ KGDB_SPIN_LOCK(&kgdb_spinlock); -+ rdtscll(kgdb_info.entry_tsc); -+ /* -+ * We depend on this spinlock and the NMI watch dog to control the -+ * other cpus. They will arrive at "in_kgdb()" as a result of the -+ * NMI and will wait there for the following spin locks to be -+ * released. -+ */ -+#ifdef CONFIG_SMP -+ -+#if 0 -+ if (cpu_callout_map & ~MAX_CPU_MASK) { -+ printk("kgdb : too many cpus, possibly not mapped" -+ " in contiguous space, change MAX_NO_CPUS" -+ " in kgdb_stub and make new kernel.\n" -+ " cpu_callout_map is %lx\n", cpu_callout_map); -+ goto exit_just_unlock; -+ } -+#endif -+ if (spinlock_count == 1) { -+ int time, end_time, dum; -+ int i; -+ int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) -+ }; -+ if (remote_debug) { -+ printk("kgdb : cpu %d entry, syncing others\n", -+ smp_processor_id()); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ /* -+ * Use trylock as we may already hold the lock if -+ * we are holding the cpu. Net result is all -+ * locked. -+ */ -+ spin_trylock(&waitlocks[i]); -+ } -+ for (i = 0; i < MAX_NO_CPUS; i++) -+ cpu_logged_in[i] = 0; -+ /* -+ * Wait for their arrival. We know the watch dog is active if -+ * in_kgdb() has ever been called, as it is always called on a -+ * watchdog tick. -+ */ -+ rdtsc(dum, time); -+ end_time = time + 2; /* Note: we use the High order bits! */ -+ i = 1; -+ if (num_online_cpus() > 1) { -+ int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; -+ smp_send_nmi_allbutself(); -+ -+ while (i < num_online_cpus() && time != end_time) { -+ int j; -+ for (j = 0; j < MAX_NO_CPUS; j++) { -+ if (waiting_cpus[j].task && -+ waiting_cpus[j].task != NOCPU && -+ !cpu_logged_in[j]) { -+ i++; -+ cpu_logged_in[j] = 1; -+ if (remote_debug) { -+ printk -+ ("kgdb : cpu %d arrived at kgdb\n", -+ j); -+ } -+ break; -+ } else if (!waiting_cpus[j].task && -+ !cpu_online(j)) { -+ waiting_cpus[j].task = NOCPU; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].hold = 1; -+ break; -+ } -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ -+ int wait = 100000; -+ while (wait--) ; -+ if (!waiting_cpus[j].task && -+ in_kgdb_here_log[j]) { -+ printk -+ ("kgdb : cpu %d stall" -+ " in in_kgdb\n", -+ j); -+ i++; -+ cpu_logged_in[j] = 1; -+ waiting_cpus[j].task = -+ (struct task_struct -+ *) 1; -+ } -+ } -+ } -+ -+ if (in_kgdb_entry_log[smp_processor_id()] > -+ (me_in_kgdb + 10)) { -+ break; -+ } -+ -+ rdtsc(dum, time); -+ } -+ if (i < num_online_cpus()) { -+ printk -+ ("kgdb : time out, proceeding without sync\n"); -+#if 0 -+ printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", -+ waiting_cpus[0].task != 0, -+ waiting_cpus[1].task != 0); -+ printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", -+ cpu_logged_in[0], cpu_logged_in[1]); -+ printk -+ ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", -+ in_kgdb_here_log[0] != 0, -+ in_kgdb_here_log[1] != 0); -+#endif -+ entry_state = NO_SYNC; -+ } else { -+#if 0 -+ int ent = -+ in_kgdb_entry_log[smp_processor_id()] - -+ me_in_kgdb; -+ printk("kgdb : sync after %d entries\n", ent); -+#endif -+ } -+ } else { -+ if (remote_debug) { -+ printk -+ ("kgdb : %d cpus, but watchdog not active\n" -+ "proceeding without locking down other cpus\n", -+ num_online_cpus()); -+ entry_state = NO_NMI; -+ } -+ } -+ } -+#endif -+ -+ if (remote_debug) { -+ unsigned long *lp = (unsigned long *) &linux_regs; -+ -+ printk("handle_exception(exceptionVector=%d, " -+ "signo=%d, err_code=%d, linux_regs=%p)\n", -+ exceptionVector, signo, err_code, linux_regs); -+ if (debug_regs) { -+ print_regs(®s); -+ printk("Stk: %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[0], lp[1], lp[2], lp[3], -+ lp[4], lp[5], lp[6], lp[7]); -+ printk(" %8lx %8lx %8lx %8lx" -+ " %8lx %8lx %8lx %8lx\n", -+ lp[8], lp[9], lp[10], lp[11], -+ lp[12], lp[13], lp[14], lp[15]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[16], lp[17], lp[18], lp[19], -+ lp[20], lp[21], lp[22], lp[23]); -+ printk(" %8lx %8lx %8lx %8lx " -+ "%8lx %8lx %8lx %8lx\n", -+ lp[24], lp[25], lp[26], lp[27], -+ lp[28], lp[29], lp[30], lp[31]); -+ } -+ } -+ -+ /* Disable hardware debugging while we are in kgdb */ -+ /* Get the debug register status register */ -+/* *INDENT-OFF* */ -+ __asm__("movl %0,%%db7" -+ : /* no output */ -+ :"r"(0)); -+ -+ asm volatile ("movl %%db6, %0\n" -+ :"=r" (hw_breakpoint_status) -+ :); -+ -+/* *INDENT-ON* */ -+ switch (exceptionVector) { -+ case 0: /* divide error */ -+ case 1: /* debug exception */ -+ case 2: /* NMI */ -+ case 3: /* breakpoint */ -+ case 4: /* overflow */ -+ case 5: /* bounds check */ -+ case 6: /* invalid opcode */ -+ case 7: /* device not available */ -+ case 8: /* double fault (errcode) */ -+ case 10: /* invalid TSS (errcode) */ -+ case 12: /* stack fault (errcode) */ -+ case 16: /* floating point error */ -+ case 17: /* alignment check (errcode) */ -+ default: /* any undocumented */ -+ break; -+ case 11: /* segment not present (errcode) */ -+ case 13: /* general protection (errcode) */ -+ case 14: /* page fault (special errcode) */ -+ case 19: /* cache flush denied */ -+ if (mem_err_expected) { -+ /* -+ * This fault occured because of the -+ * get_char or set_char routines. These -+ * two routines use either eax of edx to -+ * indirectly reference the location in -+ * memory that they are working with. -+ * For a page fault, when we return the -+ * instruction will be retried, so we -+ * have to make sure that these -+ * registers point to valid memory. -+ */ -+ mem_err = 1; /* set mem error flag */ -+ mem_err_expected = 0; -+ mem_err_cnt++; /* helps in debugging */ -+ /* make valid address */ -+ regs.eax = (long) &garbage_loc; -+ /* make valid address */ -+ regs.edx = (long) &garbage_loc; -+ if (remote_debug) -+ printk("Return after memory error: " -+ "mem_err_cnt=%d\n", mem_err_cnt); -+ if (debug_regs) -+ print_regs(®s); -+ goto exit_kgdb; -+ } -+ break; -+ } -+ if (remote_debug) -+ printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); -+ -+ gdb_i386vector = exceptionVector; -+ gdb_i386errcode = err_code; -+ kgdb_info.called_from = __builtin_return_address(0); -+#ifdef CONFIG_SMP -+ /* -+ * OK, we can now communicate, lets tell gdb about the sync. -+ * but only if we had a problem. -+ */ -+ switch (entry_state) { -+ case NO_NMI: -+ to_gdb("NMI not active, other cpus not stopped\n"); -+ break; -+ case NO_SYNC: -+ to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); -+ default:; -+ } -+ -+#endif -+/* -+ * Set up the gdb function call area. -+ */ -+ trap_cpu = smp_processor_id(); -+ OLD_esp = NEW_esp = (int) (&linux_regs->esp); -+ -+ IF_SMP(once_again:) -+ /* reply to host that an exception has occurred */ -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ -+ putpacket(remcomOutBuffer); -+ -+ while (1 == 1) { -+ error = 0; -+ remcomOutBuffer[0] = 0; -+ getpacket(remcomInBuffer); -+ switch (remcomInBuffer[0]) { -+ case '?': -+ remcomOutBuffer[0] = 'S'; -+ remcomOutBuffer[1] = hexchars[signo >> 4]; -+ remcomOutBuffer[2] = hexchars[signo % 16]; -+ remcomOutBuffer[3] = 0; -+ break; -+ case 'd': -+ remote_debug = !(remote_debug); /* toggle debug flag */ -+ printk("Remote debug %s\n", -+ remote_debug ? "on" : "off"); -+ break; -+ case 'g': /* return the value of the CPU registers */ -+ get_gdb_regs(usethread, ®s, gdb_regs); -+ mem2hex((char *) gdb_regs, -+ remcomOutBuffer, NUMREGBYTES, 0); -+ break; -+ case 'G': /* set the value of the CPU registers - return OK */ -+ hex2mem(&remcomInBuffer[1], -+ (char *) gdb_regs, NUMREGBYTES, 0); -+ if (!usethread || usethread == current) { -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "E00"); -+ } -+ break; -+ -+ case 'P':{ /* set the value of a single CPU register - -+ return OK */ -+ /* -+ * For some reason, gdb wants to talk about psudo -+ * registers (greater than 15). These may have -+ * meaning for ptrace, but for us it is safe to -+ * ignor them. We do this by dumping them into -+ * _GS which we also ignor, but do have memory for. -+ */ -+ int regno; -+ -+ ptr = &remcomInBuffer[1]; -+ regs_to_gdb_regs(gdb_regs, ®s); -+ if ((!usethread || usethread == current) && -+ hexToInt(&ptr, ®no) && -+ *ptr++ == '=' && (regno >= 0)) { -+ regno = -+ (regno >= NUMREGS ? _GS : regno); -+ hex2mem(ptr, (char *) &gdb_regs[regno], -+ 4, 0); -+ gdb_regs_to_regs(gdb_regs, ®s); -+ strcpy(remcomOutBuffer, "OK"); -+ break; -+ } -+ strcpy(remcomOutBuffer, "E01"); -+ break; -+ } -+ -+ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ -+ case 'm': -+ /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { -+ ptr = 0; -+ /* -+ * hex doubles the byte count -+ */ -+ if (length > (BUFMAX / 2)) -+ length = BUFMAX / 2; -+ mem2hex((char *) addr, -+ remcomOutBuffer, length, 1); -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } -+ } -+ -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E01"); -+ debug_error -+ ("malformed read memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ -+ /* MAA..AA,LLLL: -+ Write LLLL bytes at address AA.AA return OK */ -+ case 'M': -+ /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr) && -+ (*(ptr++) == ',') && -+ (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { -+ hex2mem(ptr, (char *) addr, length, 1); -+ -+ if (mem_err) { -+ strcpy(remcomOutBuffer, "E03"); -+ debug_error("memory fault\n", NULL); -+ } else { -+ strcpy(remcomOutBuffer, "OK"); -+ } -+ -+ ptr = 0; -+ } -+ if (ptr) { -+ strcpy(remcomOutBuffer, "E02"); -+ debug_error -+ ("malformed write memory command: %s\n", -+ remcomInBuffer); -+ } -+ break; -+ case 'S': -+ remcomInBuffer[0] = 's'; -+ case 'C': -+ /* Csig;AA..AA where ;AA..AA is optional -+ * continue with signal -+ * Since signals are meaning less to us, delete that -+ * part and then fall into the 'c' code. -+ */ -+ ptr = &remcomInBuffer[1]; -+ length = 2; -+ while (*ptr && *ptr != ';') { -+ length++; -+ ptr++; -+ } -+ if (*ptr) { -+ do { -+ ptr++; -+ *(ptr - length++) = *ptr; -+ } while (*ptr); -+ } else { -+ remcomInBuffer[1] = 0; -+ } -+ -+ /* cAA..AA Continue at address AA..AA(optional) */ -+ /* sAA..AA Step one instruction from AA..AA(optional) */ -+ /* D detach, reply OK and then continue */ -+ case 'c': -+ case 's': -+ case 'D': -+ -+ /* try to read optional parameter, -+ pc unchanged if no parm */ -+ ptr = &remcomInBuffer[1]; -+ if (hexToInt(&ptr, &addr)) { -+ if (remote_debug) -+ printk("Changing EIP to 0x%x\n", addr); -+ -+ regs.eip = addr; -+ } -+ -+ newPC = regs.eip; -+ -+ /* clear the trace bit */ -+ regs.eflags &= 0xfffffeff; -+ -+ /* set the trace bit if we're stepping */ -+ if (remcomInBuffer[0] == 's') -+ regs.eflags |= 0x100; -+ -+ /* detach is a friendly version of continue. Note that -+ debugging is still enabled (e.g hit control C) -+ */ -+ if (remcomInBuffer[0] == 'D') { -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ } -+ -+ if (remote_debug) { -+ printk("Resuming execution\n"); -+ print_regs(®s); -+ } -+ asm volatile ("movl %%db6, %0\n":"=r" (dr6) -+ :); -+ if (!(dr6 & 0x4000)) { -+ for (breakno = 0; breakno < 4; ++breakno) { -+ if (dr6 & (1 << breakno) && -+ (breakinfo[breakno].type == 0)) { -+ /* Set restore flag */ -+ regs.eflags |= 0x10000; -+ break; -+ } -+ } -+ } -+ -+ if (kgdboe) -+ netpoll_set_trap(0); -+ -+ correct_hw_break(); -+ asm volatile ("movl %0, %%db6\n"::"r" (0)); -+ goto exit_kgdb; -+ -+ /* kill the program */ -+ case 'k': /* do nothing */ -+ break; -+ -+ /* query */ -+ case 'q': -+ nothreads = 0; -+ switch (remcomInBuffer[1]) { -+ case 'f': -+ threadid = 1; -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ case 's': -+ if (!cmp_str(&remcomInBuffer[2], -+ "ThreadInfo", 10)) -+ break; -+ -+ remcomOutBuffer[nothreads++] = 'm'; -+ for (; threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ nothreads += int_to_hex_v( -+ &remcomOutBuffer[ -+ nothreads], -+ threadid); -+ if (thread_min > threadid) -+ thread_min = threadid; -+ remcomOutBuffer[ -+ nothreads] = ','; -+ nothreads++; -+ if (nothreads > BUFMAX - 10) -+ break; -+ } -+ } -+ if (remcomOutBuffer[nothreads - 1] == 'm') { -+ remcomOutBuffer[nothreads - 1] = 'l'; -+ } else { -+ nothreads--; -+ } -+ remcomOutBuffer[nothreads] = 0; -+ break; -+ -+#ifdef old_thread_list /* Old thread info request */ -+ case 'L': -+ /* List threads */ -+ thread_list = 2; -+ thread_list_start = (usethread ? : current); -+ unpack_byte(remcomInBuffer + 3, &maxthreads); -+ unpack_threadid(remcomInBuffer + 5, &thref); -+ do { -+ int buf_thread_limit = -+ (BUFMAX - 22) / BUF_THREAD_ID_SIZE; -+ if (maxthreads > buf_thread_limit) { -+ maxthreads = buf_thread_limit; -+ } -+ } while (0); -+ remcomOutBuffer[0] = 'q'; -+ remcomOutBuffer[1] = 'M'; -+ remcomOutBuffer[4] = '0'; -+ pack_threadid(remcomOutBuffer + 5, &thref); -+ -+ threadid = threadref_to_int(&thref); -+ for (nothreads = 0; -+ nothreads < maxthreads && -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ thread = getthread(threadid); -+ if (thread) { -+ int_to_threadref(&thref, -+ threadid); -+ pack_threadid(remcomOutBuffer + -+ 21 + -+ nothreads * 16, -+ &thref); -+ nothreads++; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } -+ } -+ -+ if (threadid == PID_MAX + MAX_NO_CPUS) { -+ remcomOutBuffer[4] = '1'; -+ } -+ pack_hex_byte(remcomOutBuffer + 2, nothreads); -+ remcomOutBuffer[21 + nothreads * 16] = '\0'; -+ break; -+#endif -+ case 'C': -+ /* Current thread id */ -+ remcomOutBuffer[0] = 'Q'; -+ remcomOutBuffer[1] = 'C'; -+ threadid = current->pid; -+ if (!threadid) { -+ /* -+ * idle thread -+ */ -+ for (threadid = PID_MAX; -+ threadid < PID_MAX + MAX_NO_CPUS; -+ threadid++) { -+ if (current == -+ idle_task(threadid - -+ PID_MAX)) -+ break; -+ } -+ } -+ int_to_threadref(&thref, threadid); -+ pack_threadid(remcomOutBuffer + 2, &thref); -+ remcomOutBuffer[18] = '\0'; -+ break; -+ -+ case 'E': -+ /* Print exception info */ -+ printexceptioninfo(exceptionVector, -+ err_code, remcomOutBuffer); -+ break; -+ case 'T':{ -+ char * nptr; -+ /* Thread extra info */ -+ if (!cmp_str(&remcomInBuffer[2], -+ "hreadExtraInfo,", 15)) { -+ break; -+ } -+ ptr = &remcomInBuffer[17]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ nptr = &thread->comm[0]; -+ length = 0; -+ ptr = &remcomOutBuffer[0]; -+ do { -+ length++; -+ ptr = pack_hex_byte(ptr, *nptr++); -+ } while (*nptr && length < 16); -+ /* -+ * would like that 16 to be the size of -+ * task_struct.comm but don't know the -+ * syntax.. -+ */ -+ *ptr = 0; -+ } -+ } -+ break; -+ -+ /* task related */ -+ case 'H': -+ switch (remcomInBuffer[1]) { -+ case 'g': -+ ptr = &remcomInBuffer[2]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (!thread) { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ break; -+ } -+ /* -+ * Just in case I forget what this is all about, -+ * the "thread info" command to gdb causes it -+ * to ask for a thread list. It then switches -+ * to each thread and asks for the registers. -+ * For this (and only this) usage, we want to -+ * fudge the registers of tasks not on the run -+ * list (i.e. waiting) to show the routine that -+ * called schedule. Also, gdb, is a minimalist -+ * in that if the current thread is the last -+ * it will not re-read the info when done. -+ * This means that in this case we must show -+ * the real registers. So here is how we do it: -+ * Each entry we keep track of the min -+ * thread in the list (the last that gdb will) -+ * get info for. We also keep track of the -+ * starting thread. -+ * "thread_list" is cleared when switching back -+ * to the min thread if it is was current, or -+ * if it was not current, thread_list is set -+ * to 1. When the switch to current comes, -+ * if thread_list is 1, clear it, else do -+ * nothing. -+ */ -+ usethread = thread; -+ if ((thread_list == 1) && -+ (thread == thread_list_start)) { -+ thread_list = 0; -+ } -+ if (thread_list && (threadid == thread_min)) { -+ if (thread == thread_list_start) { -+ thread_list = 0; -+ } else { -+ thread_list = 1; -+ } -+ } -+ /* follow through */ -+ case 'c': -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ break; -+ } -+ break; -+ -+ /* Query thread status */ -+ case 'T': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &threadid); -+ thread = getthread(threadid); -+ if (thread) { -+ remcomOutBuffer[0] = 'O'; -+ remcomOutBuffer[1] = 'K'; -+ remcomOutBuffer[2] = '\0'; -+ if (thread_min > threadid) -+ thread_min = threadid; -+ } else { -+ remcomOutBuffer[0] = 'E'; -+ remcomOutBuffer[1] = '\0'; -+ } -+ break; -+ -+ case 'Y': /* set up a hardware breakpoint */ -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ ptr++; -+ hexToInt(&ptr, &breaktype); -+ ptr++; -+ hexToInt(&ptr, &length); -+ ptr++; -+ hexToInt(&ptr, &addr); -+ if (set_hw_break(breakno & 0x3, -+ breaktype & 0x3, -+ length & 0x3, addr) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; -+ -+ /* Remove hardware breakpoint */ -+ case 'y': -+ ptr = &remcomInBuffer[1]; -+ hexToInt(&ptr, &breakno); -+ if (remove_hw_break(breakno & 0x3) == 0) { -+ strcpy(remcomOutBuffer, "OK"); -+ } else { -+ strcpy(remcomOutBuffer, "ERROR"); -+ } -+ break; -+ -+ case 'r': /* reboot */ -+ strcpy(remcomOutBuffer, "OK"); -+ putpacket(remcomOutBuffer); -+ /*to_gdb("Rebooting\n"); */ -+ /* triplefault no return from here */ -+ { -+ static long no_idt[2]; -+ __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); -+ BREAKPOINT; -+ } -+ -+ } /* switch */ -+ -+ /* reply to the request */ -+ putpacket(remcomOutBuffer); -+ } /* while(1==1) */ -+ /* -+ * reached by goto only. -+ */ -+ exit_kgdb: -+ /* -+ * Here is where we set up to trap a gdb function call. NEW_esp -+ * will be changed if we are trying to do this. We handle both -+ * adding and subtracting, thus allowing gdb to put grung on -+ * the stack which it removes later. -+ */ -+ if (NEW_esp != OLD_esp) { -+ int *ptr = END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) -+ ptr -= (OLD_esp - NEW_esp) / sizeof (int); -+ *--ptr = linux_regs->eflags; -+ *--ptr = linux_regs->xcs; -+ *--ptr = linux_regs->eip; -+ *--ptr = linux_regs->ecx; -+ *--ptr = linux_regs->ebx; -+ *--ptr = linux_regs->eax; -+ linux_regs->ecx = NEW_esp - (sizeof (int) * 6); -+ linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; -+ if (NEW_esp < OLD_esp) { -+ linux_regs->eip = (unsigned int) fn_call_stub; -+ } else { -+ linux_regs->eip = (unsigned int) fn_rtn_stub; -+ linux_regs->eax = NEW_esp; -+ } -+ linux_regs->eflags &= ~(IF_BIT | TF_BIT); -+ } -+#ifdef CONFIG_SMP -+ /* -+ * Release gdb wait locks -+ * Sanity check time. Must have at least one cpu to run. Also single -+ * step must not be done if the current cpu is on hold. -+ */ -+ if (spinlock_count == 1) { -+ int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; -+ int cpu_avail = 0; -+ int i; -+ -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!cpu_online(i)) -+ break; -+ if (!hold_cpu(i)) { -+ cpu_avail = 1; -+ } -+ } -+ /* -+ * Early in the bring up there will be NO cpus on line... -+ */ -+ if (!cpu_avail && !cpus_empty(cpu_online_map)) { -+ to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); -+ goto once_again; -+ } -+ if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { -+ to_gdb -+ ("Current cpu must be unblocked to single step\n"); -+ goto once_again; -+ } -+ if (!(ss_hold)) { -+ int i; -+ for (i = 0; i < MAX_NO_CPUS; i++) { -+ if (!hold_cpu(i)) { -+ spin_unlock(&waitlocks[i]); -+ } -+ } -+ } else { -+ spin_unlock(&waitlocks[smp_processor_id()]); -+ } -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ /* -+ * If this cpu is on hold, this is where we -+ * do it. Note, the NMI will pull us out of here, -+ * but will return as the above lock is not held. -+ * We will stay here till another cpu releases the lock for us. -+ */ -+ spin_unlock_wait(waitlocks + smp_processor_id()); -+ kgdb_local_irq_restore(flags); -+ return (0); -+ } -+#if 0 -+exit_just_unlock: -+#endif -+#endif -+ /* Release kgdb spinlock */ -+ KGDB_SPIN_UNLOCK(&kgdb_spinlock); -+ kgdb_local_irq_restore(flags); -+ return (0); -+} -+ -+/* this function is used to set up exception handlers for tracing and -+ * breakpoints. -+ * This function is not needed as the above line does all that is needed. -+ * We leave it for backward compatitability... -+ */ -+void -+set_debug_traps(void) -+{ -+ /* -+ * linux_debug_hook is defined in traps.c. We store a pointer -+ * to our own exception handler into it. -+ -+ * But really folks, every hear of labeled common, an old Fortran -+ * concept. Lots of folks can reference it and it is define if -+ * anyone does. Only one can initialize it at link time. We do -+ * this with the hook. See the statement above. No need for any -+ * executable code and it is ready as soon as the kernel is -+ * loaded. Very desirable in kernel debugging. -+ -+ linux_debug_hook = handle_exception ; -+ */ -+ -+ /* In case GDB is started before us, ack any packets (presumably -+ "$?#xx") sitting there. -+ putDebugChar ('+'); -+ -+ initialized = 1; -+ */ -+} -+ -+/* This function will generate a breakpoint exception. It is used at the -+ beginning of a program to sync up with a debugger and can be used -+ otherwise as a quick means to stop program execution and "break" into -+ the debugger. */ -+/* But really, just use the BREAKPOINT macro. We will handle the int stuff -+ */ -+ -+#ifdef later -+/* -+ * possibly we should not go thru the traps.c code at all? Someday. -+ */ -+void -+do_kgdb_int3(struct pt_regs *regs, long error_code) -+{ -+ kgdb_handle_exception(3, 5, error_code, regs); -+ return; -+} -+#endif -+#undef regs -+#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS -+asmlinkage void -+bad_sys_call_exit(int stuff) -+{ -+ struct pt_regs *regs = (struct pt_regs *) &stuff; -+ printk("Sys call %d return with %x preempt_count\n", -+ (int) regs->orig_eax, preempt_count()); -+} -+#endif -+#ifdef CONFIG_STACK_OVERFLOW_TEST -+#include -+asmlinkage void -+stack_overflow(void) -+{ -+#ifdef BREAKPOINT -+ BREAKPOINT; -+#else -+ printk("Kernel stack overflow, looping forever\n"); -+#endif -+ while (1) { -+ } -+} -+#endif -+ -+#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) -+char gdbconbuf[BUFMAX]; -+ -+static void -+kgdb_gdb_message(const char *s, unsigned count) -+{ -+ int i; -+ int wcount; -+ char *bufptr; -+ /* -+ * This takes care of NMI while spining out chars to gdb -+ */ -+ IF_SMP(in_kgdb_console = 1); -+ gdbconbuf[0] = 'O'; -+ bufptr = gdbconbuf + 1; -+ while (count > 0) { -+ if ((count << 1) > (BUFMAX - 2)) { -+ wcount = (BUFMAX - 2) >> 1; -+ } else { -+ wcount = count; -+ } -+ count -= wcount; -+ for (i = 0; i < wcount; i++) { -+ bufptr = pack_hex_byte(bufptr, s[i]); -+ } -+ *bufptr = '\0'; -+ s += wcount; -+ -+ putpacket(gdbconbuf); -+ -+ } -+ IF_SMP(in_kgdb_console = 0); -+} -+#endif -+#ifdef CONFIG_SMP -+static void -+to_gdb(const char *s) -+{ -+ int count = 0; -+ while (s[count] && (count++ < BUFMAX)) ; -+ kgdb_gdb_message(s, count); -+} -+#endif -+#ifdef CONFIG_KGDB_CONSOLE -+#include -+#include -+#include -+#include -+#include -+ -+void -+kgdb_console_write(struct console *co, const char *s, unsigned count) -+{ -+ -+ if (gdb_i386vector == -1) { -+ /* -+ * We have not yet talked to gdb. What to do... -+ * lets break, on continue we can do the write. -+ * But first tell him whats up. Uh, well no can do, -+ * as this IS the console. Oh well... -+ * We do need to wait or the messages will be lost. -+ * Other option would be to tell the above code to -+ * ignore this breakpoint and do an auto return, -+ * but that might confuse gdb. Also this happens -+ * early enough in boot up that we don't have the traps -+ * set up yet, so... -+ */ -+ breakpoint(); -+ } -+ kgdb_gdb_message(s, count); -+} -+ -+/* -+ * ------------------------------------------------------------ -+ * Serial KGDB driver -+ * ------------------------------------------------------------ -+ */ -+ -+static struct console kgdbcons = { -+ name:"kgdb", -+ write:kgdb_console_write, -+#ifdef CONFIG_KGDB_USER_CONSOLE -+ device:kgdb_console_device, -+#endif -+ flags:CON_PRINTBUFFER | CON_ENABLED, -+ index:-1, -+}; -+ -+/* -+ * The trick here is that this file gets linked before printk.o -+ * That means we get to peer at the console info in the command -+ * line before it does. If we are up, we register, otherwise, -+ * do nothing. By returning 0, we allow printk to look also. -+ */ -+static int kgdb_console_enabled; -+ -+int __init -+kgdb_console_init(char *str) -+{ -+ if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { -+ register_console(&kgdbcons); -+ kgdb_console_enabled = 1; -+ } -+ return 0; /* let others look at the string */ -+} -+ -+__setup("console=", kgdb_console_init); -+ -+#ifdef CONFIG_KGDB_USER_CONSOLE -+static kdev_t kgdb_console_device(struct console *c); -+/* This stuff sort of works, but it knocks out telnet devices -+ * we are leaving it here in case we (or you) find time to figure it out -+ * better.. -+ */ -+ -+/* -+ * We need a real char device as well for when the console is opened for user -+ * space activities. -+ */ -+ -+static int -+kgdb_consdev_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static ssize_t -+kgdb_consdev_write(struct file *file, const char *buf, -+ size_t count, loff_t * ppos) -+{ -+ int size, ret = 0; -+ static char kbuf[128]; -+ static DECLARE_MUTEX(sem); -+ -+ /* We are not reentrant... */ -+ if (down_interruptible(&sem)) -+ return -ERESTARTSYS; -+ -+ while (count > 0) { -+ /* need to copy the data from user space */ -+ size = count; -+ if (size > sizeof (kbuf)) -+ size = sizeof (kbuf); -+ if (copy_from_user(kbuf, buf, size)) { -+ ret = -EFAULT; -+ break;; -+ } -+ kgdb_console_write(&kgdbcons, kbuf, size); -+ count -= size; -+ ret += size; -+ buf += size; -+ } -+ -+ up(&sem); -+ -+ return ret; -+} -+ -+struct file_operations kgdb_consdev_fops = { -+ open:kgdb_consdev_open, -+ write:kgdb_consdev_write -+}; -+static kdev_t -+kgdb_console_device(struct console *c) -+{ -+ return MKDEV(TTYAUX_MAJOR, 1); -+} -+ -+/* -+ * This routine gets called from the serial stub in the i386/lib -+ * This is so it is done late in bring up (just before the console open). -+ */ -+void -+kgdb_console_finit(void) -+{ -+ if (kgdb_console_enabled) { -+ char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); -+ char *cp = cptr; -+ while (*cptr && *cptr != '(') -+ cptr++; -+ *cptr = 0; -+ unregister_chrdev(TTYAUX_MAJOR, cp); -+ register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); -+ } -+} -+#endif -+#endif -+#ifdef CONFIG_KGDB_TS -+#include /* time stamp code */ -+#include /* in_interrupt */ -+#ifdef CONFIG_KGDB_TS_64 -+#define DATA_POINTS 64 -+#endif -+#ifdef CONFIG_KGDB_TS_128 -+#define DATA_POINTS 128 -+#endif -+#ifdef CONFIG_KGDB_TS_256 -+#define DATA_POINTS 256 -+#endif -+#ifdef CONFIG_KGDB_TS_512 -+#define DATA_POINTS 512 -+#endif -+#ifdef CONFIG_KGDB_TS_1024 -+#define DATA_POINTS 1024 -+#endif -+#ifndef DATA_POINTS -+#define DATA_POINTS 128 /* must be a power of two */ -+#endif -+#define INDEX_MASK (DATA_POINTS - 1) -+#if (INDEX_MASK & DATA_POINTS) -+#error "CONFIG_KGDB_TS_COUNT must be a power of 2" -+#endif -+struct kgdb_and_then_struct { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ int data0; -+ int data1; -+}; -+struct kgdb_and_then_struct2 { -+#ifdef CONFIG_SMP -+ int on_cpu; -+#endif -+ struct task_struct *task; -+ long long at_time; -+ int from_ln; -+ char *in_src; -+ void *from; -+ int *with_shpf; -+ struct task_struct *t1; -+ struct task_struct *t2; -+}; -+struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; -+ -+struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; -+int kgdb_and_then_count; -+ -+void -+kgdb_tstamp(int line, char *source, int data0, int data1) -+{ -+ static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; -+ int flags; -+ kgdb_local_irq_save(flags); -+ spin_lock(&ts_spin); -+ rdtscll(kgdb_and_then->at_time); -+#ifdef CONFIG_SMP -+ kgdb_and_then->on_cpu = smp_processor_id(); -+#endif -+ kgdb_and_then->task = current; -+ kgdb_and_then->from_ln = line; -+ kgdb_and_then->in_src = source; -+ kgdb_and_then->from = __builtin_return_address(0); -+ kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | -+ (preempt_count() << 8)); -+ kgdb_and_then->data0 = data0; -+ kgdb_and_then->data1 = data1; -+ kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; -+ spin_unlock(&ts_spin); -+ kgdb_local_irq_restore(flags); -+#ifdef CONFIG_PREEMPT -+ -+#endif -+ return; -+} -+#endif -+typedef int gdb_debug_hook(int exceptionVector, -+ int signo, int err_code, struct pt_regs *linux_regs); -+gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ -+ -+static int kgdb_need_breakpoint[NR_CPUS]; -+ -+void kgdb_schedule_breakpoint(void) -+{ -+ kgdb_need_breakpoint[smp_processor_id()] = 1; -+} -+ -+void kgdb_process_breakpoint(void) -+{ -+ /* -+ * Handle a breakpoint queued from inside network driver code -+ * to avoid reentrancy issues -+ */ -+ if (kgdb_need_breakpoint[smp_processor_id()]) { -+ kgdb_need_breakpoint[smp_processor_id()] = 0; -+ BREAKPOINT; -+ } -+} -+ ---- linux-2.6.0/arch/i386/kernel/ldt.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/ldt.c 2003-12-28 23:26:36.000000000 -0800 -@@ -2,7 +2,7 @@ - * linux/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds -- * Copyright (C) 1999 Ingo Molnar -+ * Copyright (C) 1999, 2003 Ingo Molnar - */ - - #include -@@ -18,6 +18,8 @@ - #include - #include - #include -+#include -+#include - - #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ - static void flush_ldt(void *null) -@@ -29,34 +31,31 @@ static void flush_ldt(void *null) - - static int alloc_ldt(mm_context_t *pc, int mincount, int reload) - { -- void *oldldt; -- void *newldt; -- int oldsize; -+ int oldsize, newsize, i; - - if (mincount <= pc->size) - return 0; -+ /* -+ * LDT got larger - reallocate if necessary. -+ */ - oldsize = pc->size; - mincount = (mincount+511)&(~511); -- if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) -- newldt = vmalloc(mincount*LDT_ENTRY_SIZE); -- else -- newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); -- -- if (!newldt) -- return -ENOMEM; -- -- if (oldsize) -- memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); -- oldldt = pc->ldt; -- memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); -- pc->ldt = newldt; -- wmb(); -+ newsize = mincount*LDT_ENTRY_SIZE; -+ for (i = 0; i < newsize; i += PAGE_SIZE) { -+ int nr = i/PAGE_SIZE; -+ BUG_ON(i >= 64*1024); -+ if (!pc->ldt_pages[nr]) { -+ pc->ldt_pages[nr] = alloc_page(GFP_HIGHUSER); -+ if (!pc->ldt_pages[nr]) -+ return -ENOMEM; -+ clear_highpage(pc->ldt_pages[nr]); -+ } -+ } - pc->size = mincount; -- wmb(); -- - if (reload) { - #ifdef CONFIG_SMP - cpumask_t mask; -+ - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); -@@ -67,21 +66,20 @@ static int alloc_ldt(mm_context_t *pc, i - load_LDT(pc); - #endif - } -- if (oldsize) { -- if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) -- vfree(oldldt); -- else -- kfree(oldldt); -- } - return 0; - } - - static inline int copy_ldt(mm_context_t *new, mm_context_t *old) - { -- int err = alloc_ldt(new, old->size, 0); -- if (err < 0) -+ int i, err, size = old->size, nr_pages = (size*LDT_ENTRY_SIZE + PAGE_SIZE-1)/PAGE_SIZE; -+ -+ err = alloc_ldt(new, size, 0); -+ if (err < 0) { -+ new->size = 0; - return err; -- memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); -+ } -+ for (i = 0; i < nr_pages; i++) -+ copy_user_highpage(new->ldt_pages[i], old->ldt_pages[i], 0); - return 0; - } - -@@ -96,6 +94,7 @@ int init_new_context(struct task_struct - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; -+ memset(mm->context.ldt_pages, 0, sizeof(struct page *) * MAX_LDT_PAGES); - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); -@@ -107,23 +106,21 @@ int init_new_context(struct task_struct - - /* - * No need to lock the MM as we are the last user -+ * Do not touch the ldt register, we are already -+ * in the next thread. - */ - void destroy_context(struct mm_struct *mm) - { -- if (mm->context.size) { -- if (mm == current->active_mm) -- clear_LDT(); -- if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) -- vfree(mm->context.ldt); -- else -- kfree(mm->context.ldt); -- mm->context.size = 0; -- } -+ int i, nr_pages = (mm->context.size*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; -+ -+ for (i = 0; i < nr_pages; i++) -+ __free_page(mm->context.ldt_pages[i]); -+ mm->context.size = 0; - } - - static int read_ldt(void __user * ptr, unsigned long bytecount) - { -- int err; -+ int err, i; - unsigned long size; - struct mm_struct * mm = current->mm; - -@@ -138,8 +135,25 @@ static int read_ldt(void __user * ptr, u - size = bytecount; - - err = 0; -- if (copy_to_user(ptr, mm->context.ldt, size)) -- err = -EFAULT; -+ /* -+ * This is necessary just in case we got here straight from a -+ * context-switch where the ptes were set but no tlb flush -+ * was done yet. We rather avoid doing a TLB flush in the -+ * context-switch path and do it here instead. -+ */ -+ __flush_tlb_global(); -+ -+ for (i = 0; i < size; i += PAGE_SIZE) { -+ int nr = i / PAGE_SIZE, bytes; -+ char *kaddr = kmap(mm->context.ldt_pages[nr]); -+ -+ bytes = size - i; -+ if (bytes > PAGE_SIZE) -+ bytes = PAGE_SIZE; -+ if (copy_to_user(ptr + i, kaddr, size - i)) -+ err = -EFAULT; -+ kunmap(mm->context.ldt_pages[nr]); -+ } - up(&mm->context.sem); - if (err < 0) - return err; -@@ -158,7 +172,7 @@ static int read_default_ldt(void __user - - err = 0; - address = &default_ldt[0]; -- size = 5*sizeof(struct desc_struct); -+ size = 5*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - -@@ -200,7 +214,15 @@ static int write_ldt(void __user * ptr, - goto out_unlock; - } - -- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); -+ /* -+ * No rescheduling allowed from this point to the install. -+ * -+ * We do a TLB flush for the same reason as in the read_ldt() path. -+ */ -+ preempt_disable(); -+ __flush_tlb_global(); -+ lp = (__u32 *) ((ldt_info.entry_number << 3) + -+ (char *) __kmap_atomic_vaddr(KM_LDT_PAGE0)); - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { -@@ -221,6 +243,7 @@ install: - *lp = entry_1; - *(lp+1) = entry_2; - error = 0; -+ preempt_enable(); - - out_unlock: - up(&mm->context.sem); -@@ -248,3 +271,26 @@ asmlinkage int sys_modify_ldt(int func, - } - return ret; - } -+ -+/* -+ * load one particular LDT into the current CPU -+ */ -+void load_LDT_nolock(mm_context_t *pc, int cpu) -+{ -+ struct page **pages = pc->ldt_pages; -+ int count = pc->size; -+ int nr_pages, i; -+ -+ if (likely(!count)) { -+ pages = &default_ldt_page; -+ count = 5; -+ } -+ nr_pages = (count*LDT_ENTRY_SIZE + PAGE_SIZE-1) / PAGE_SIZE; -+ -+ for (i = 0; i < nr_pages; i++) { -+ __kunmap_atomic_type(KM_LDT_PAGE0 - i); -+ __kmap_atomic(pages[i], KM_LDT_PAGE0 - i); -+ } -+ set_ldt_desc(cpu, (void *)__kmap_atomic_vaddr(KM_LDT_PAGE0), count); -+ load_LDT_desc(); -+} ---- linux-2.6.0/arch/i386/kernel/Makefile 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/Makefile 2003-12-28 23:26:36.000000000 -0800 -@@ -7,13 +7,14 @@ extra-y := head.o init_task.o vmlinux.ld - obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ - ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ - pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ -- doublefault.o -+ doublefault.o entry_trampoline.o - - obj-y += cpu/ - obj-y += timers/ - obj-$(CONFIG_ACPI_BOOT) += acpi/ - obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o - obj-$(CONFIG_MCA) += mca.o -+obj-$(CONFIG_KGDB) += kgdb_stub.o - obj-$(CONFIG_X86_MSR) += msr.o - obj-$(CONFIG_X86_CPUID) += cpuid.o - obj-$(CONFIG_MICROCODE) += microcode.o -@@ -24,12 +25,13 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o - obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o - obj-$(CONFIG_X86_IO_APIC) += io_apic.o - obj-$(CONFIG_X86_NUMAQ) += numaq.o --obj-$(CONFIG_X86_SUMMIT) += summit.o -+obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o - obj-$(CONFIG_EDD) += edd.o - obj-$(CONFIG_MODULES) += module.o - obj-y += sysenter.o vsyscall.o - obj-$(CONFIG_ACPI_SRAT) += srat.o - obj-$(CONFIG_HPET_TIMER) += time_hpet.o -+obj-$(CONFIG_EFI) += efi.o efi_stub.o - - EXTRA_AFLAGS := -traditional - ---- linux-2.6.0/arch/i386/kernel/mpparse.c 2003-11-23 19:03:00.000000000 -0800 -+++ 25/arch/i386/kernel/mpparse.c 2003-12-28 23:26:36.000000000 -0800 -@@ -668,7 +668,7 @@ void __init get_smp_config (void) - * Read the physical hardware table. Anything here will - * override the defaults. - */ -- if (!smp_read_mpc((void *)mpf->mpf_physptr)) { -+ if (!smp_read_mpc((void *)phys_to_virt(mpf->mpf_physptr))) { - smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); -@@ -962,7 +962,8 @@ void __init mp_override_legacy_irq ( - */ - for (i = 0; i < mp_irq_entries; i++) { - if ((mp_irqs[i].mpc_dstapic == intsrc.mpc_dstapic) -- && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq)) { -+ && (mp_irqs[i].mpc_srcbusirq == intsrc.mpc_srcbusirq) -+ && (mp_irqs[i].mpc_irqtype == intsrc.mpc_irqtype)) { - mp_irqs[i] = intsrc; - found = 1; - break; -@@ -1081,8 +1082,14 @@ found: - - ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; - -+ /* -+ * MPS INTI flags: -+ * trigger: 0=default, 1=edge, 3=level -+ * polarity: 0=default, 1=high, 3=low -+ * Per ACPI spec, default for SCI means level/low. -+ */ - io_apic_set_pci_routing(ioapic, ioapic_pin, irq, -- (flags.trigger >> 1) , (flags.polarity >> 1)); -+ (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1)); - } - - #ifdef CONFIG_ACPI_PCI -@@ -1129,8 +1136,11 @@ void __init mp_parse_prt (void) - continue; - ioapic_pin = irq - mp_ioapic_routing[ioapic].irq_start; - -- if (!ioapic && (irq < 16)) -- irq += 16; -+ if (es7000_plat) { -+ if (!ioapic && (irq < 16)) -+ irq += 16; -+ } -+ - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->irq mappings (but unique PCI devices); -@@ -1147,21 +1157,29 @@ void __init mp_parse_prt (void) - if ((1<irq = irq; -+ if (use_pci_vector() && !platform_legacy_irq(irq)) -+ irq = IO_APIC_VECTOR(irq); -+ entry->irq = irq; - continue; - } - - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<irq = irq; -- -+ if (!io_apic_set_pci_routing(ioapic, ioapic_pin, irq, edge_level, active_high_low)) { -+ if (use_pci_vector() && !platform_legacy_irq(irq)) -+ irq = IO_APIC_VECTOR(irq); -+ entry->irq = irq; -+ } - printk(KERN_DEBUG "%02x:%02x:%02x[%c] -> %d-%d -> IRQ %d\n", - entry->id.segment, entry->id.bus, - entry->id.device, ('A' + entry->pin), - mp_ioapic_routing[ioapic].apic_id, ioapic_pin, - entry->irq); - } -+ -+ print_IO_APIC(); -+ -+ return; - } - - #endif /*CONFIG_ACPI_PCI*/ ---- linux-2.6.0/arch/i386/kernel/nmi.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/nmi.c 2003-12-28 23:21:06.000000000 -0800 -@@ -31,7 +31,16 @@ - #include - #include - -+#ifdef CONFIG_KGDB -+#include -+#ifdef CONFIG_SMP -+unsigned int nmi_watchdog = NMI_IO_APIC; -+#else -+unsigned int nmi_watchdog = NMI_LOCAL_APIC; -+#endif -+#else - unsigned int nmi_watchdog = NMI_NONE; -+#endif - static unsigned int nmi_hz = HZ; - unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ - extern void show_registers(struct pt_regs *regs); -@@ -408,6 +417,9 @@ void touch_nmi_watchdog (void) - for (i = 0; i < NR_CPUS; i++) - alert_counter[i] = 0; - } -+#ifdef CONFIG_KGDB -+int tune_watchdog = 5*HZ; -+#endif - - void nmi_watchdog_tick (struct pt_regs * regs) - { -@@ -421,12 +433,24 @@ void nmi_watchdog_tick (struct pt_regs * - - sum = irq_stat[cpu].apic_timer_irqs; - -+#ifdef CONFIG_KGDB -+ if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { -+ -+#else - if (last_irq_sums[cpu] == sum) { -+#endif - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; -+#ifdef CONFIG_KGDB -+ if (alert_counter[cpu] == tune_watchdog) { -+ kgdb_handle_exception(2, SIGPWR, 0, regs); -+ last_irq_sums[cpu] = sum; -+ alert_counter[cpu] = 0; -+ } -+#endif - if (alert_counter[cpu] == 5*nmi_hz) { - spin_lock(&nmi_print_lock); - /* ---- linux-2.6.0/arch/i386/kernel/process.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/process.c 2003-12-28 23:26:36.000000000 -0800 -@@ -47,6 +47,7 @@ - #include - #include - #include -+#include - #ifdef CONFIG_MATH_EMULATION - #include - #endif -@@ -302,6 +303,9 @@ void flush_thread(void) - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); -+#ifdef CONFIG_X86_HIGH_ENTRY -+ clear_thread_flag(TIF_DB7); -+#endif - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - /* - * Forget coprocessor state.. -@@ -315,9 +319,8 @@ void release_thread(struct task_struct * - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { -- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", -+ printk("WARNING: dead process %8s still has LDT? <%d>\n", - dead_task->comm, -- dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } -@@ -352,7 +355,17 @@ int copy_thread(int nr, unsigned long cl - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - -+ /* -+ * get the two stack pages, for the virtual stack. -+ * -+ * IMPORTANT: this code relies on the fact that the task -+ * structure is an 8K aligned piece of physical memory. -+ */ -+ p->thread.stack_page0 = virt_to_page((unsigned long)p->thread_info); -+ p->thread.stack_page1 = virt_to_page((unsigned long)p->thread_info + PAGE_SIZE); -+ - p->thread.eip = (unsigned long) ret_from_fork; -+ p->thread_info->real_stack = p->thread_info; - - savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); -@@ -504,10 +517,41 @@ struct task_struct * __switch_to(struct - - __unlazy_fpu(prev_p); - -+#ifdef CONFIG_X86_HIGH_ENTRY -+ /* -+ * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is -+ * needed because otherwise NMIs could interrupt the -+ * user-return code with a virtual stack and stale TLBs.) -+ */ -+ __kunmap_atomic_type(KM_VSTACK0); -+ __kunmap_atomic_type(KM_VSTACK1); -+ __kmap_atomic(next->stack_page0, KM_VSTACK0); -+ __kmap_atomic(next->stack_page1, KM_VSTACK1); -+ -+ /* -+ * NOTE: here we rely on the task being the stack as well -+ */ -+ next_p->thread_info->virtual_stack = -+ (void *)__kmap_atomic_vaddr(KM_VSTACK0); -+ -+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -+ /* -+ * If next was preempted on entry from userspace to kernel, -+ * and now it's on a different cpu, we need to adjust %esp. -+ * This assumes that entry.S does not copy %esp while on the -+ * virtual stack (with interrupts enabled): which is so, -+ * except within __SWITCH_KERNELSPACE itself. -+ */ -+ if (unlikely(next->esp >= TASK_SIZE)) { -+ next->esp &= THREAD_SIZE - 1; -+ next->esp |= (unsigned long) next_p->thread_info->virtual_stack; -+ } -+#endif -+#endif - /* - * Reload esp0, LDT and the page table pointer: - */ -- load_esp0(tss, next->esp0); -+ load_virtual_esp0(tss, next_p); - - /* - * Load the per-thread Thread-Local Storage descriptor. ---- linux-2.6.0/arch/i386/kernel/reboot.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/reboot.c 2003-12-28 23:26:36.000000000 -0800 -@@ -8,6 +8,7 @@ - #include - #include - #include -+#include - #include - #include - #include "mach_reboot.h" -@@ -154,12 +155,11 @@ void machine_real_restart(unsigned char - CMOS_WRITE(0x00, 0x8f); - spin_unlock_irqrestore(&rtc_lock, flags); - -- /* Remap the kernel at virtual address zero, as well as offset zero -- from the kernel segment. This assumes the kernel segment starts at -- virtual address PAGE_OFFSET. */ -- -- memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, -- sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); -+ /* -+ * Remap the first 16 MB of RAM (which includes the kernel image) -+ * at virtual address zero: -+ */ -+ setup_identity_mappings(swapper_pg_dir, 0, 16*1024*1024); - - /* - * Use `swapper_pg_dir' as our page directory. -@@ -263,7 +263,12 @@ void machine_restart(char * __unused) - disable_IO_APIC(); - #endif - -- if(!reboot_thru_bios) { -+ if (!reboot_thru_bios) { -+ if (efi_enabled) { -+ efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, 0); -+ __asm__ __volatile__("lidt %0": :"m" (no_idt)); -+ __asm__ __volatile__("int3"); -+ } - /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; - for (;;) { -@@ -273,6 +278,8 @@ void machine_restart(char * __unused) - __asm__ __volatile__("int3"); - } - } -+ if (efi_enabled) -+ efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, 0); - - machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); - } -@@ -287,6 +294,8 @@ EXPORT_SYMBOL(machine_halt); - - void machine_power_off(void) - { -+ if (efi_enabled) -+ efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, 0); - if (pm_power_off) - pm_power_off(); - } ---- linux-2.6.0/arch/i386/kernel/setup.c 2003-11-09 16:45:04.000000000 -0800 -+++ 25/arch/i386/kernel/setup.c 2003-12-28 23:21:45.000000000 -0800 -@@ -36,6 +36,8 @@ - #include - #include - #include -+#include -+#include - #include