From f6ac49a6659498df5c384c40e5bf90591f2105d1 Mon Sep 17 00:00:00 2001 From: pschwan Date: Mon, 19 May 2003 03:15:41 +0000 Subject: [PATCH] land b_proto on b_devel: - major rework of the rpc and bulk APIs, lots of new assertions and cleanups - many rpc and bulk bugfixes, including bugs 937, 1057, 1072, 1162 - perform brw and getattr operations to multiple OSTs in parallel - all bulks are tied to exactly one request; multiple requests can be tied to a single request_set --- lustre/kernel_patches/patches/iopen-2.4.18.patch | 405 +++++++++++++++++++++ .../patches/{iopen.patch => iopen-2.4.20.patch} | 0 .../pc/{iopen.pc => iopen-2.4.18.pc} | 0 lustre/kernel_patches/pc/iopen-2.4.20.pc | 8 + lustre/liblustre/file.c | 2 + lustre/liblustre/rw.c | 14 +- lustre/ptlrpc/ptlrpc_module.c | 21 +- 7 files changed, 425 insertions(+), 25 deletions(-) create mode 100644 lustre/kernel_patches/patches/iopen-2.4.18.patch rename lustre/kernel_patches/patches/{iopen.patch => iopen-2.4.20.patch} (100%) rename lustre/kernel_patches/pc/{iopen.pc => iopen-2.4.18.pc} (100%) create mode 100644 lustre/kernel_patches/pc/iopen-2.4.20.pc diff --git a/lustre/kernel_patches/patches/iopen-2.4.18.patch b/lustre/kernel_patches/patches/iopen-2.4.18.patch new file mode 100644 index 0000000..ea5ebb6 --- /dev/null +++ b/lustre/kernel_patches/patches/iopen-2.4.18.patch @@ -0,0 +1,405 @@ + Documentation/filesystems/ext2.txt | 16 ++ + fs/ext3/Makefile | 2 + fs/ext3/inode.c | 4 + fs/ext3/iopen.c | 240 +++++++++++++++++++++++++++++++++++++ + fs/ext3/iopen.h | 15 ++ + fs/ext3/namei.c | 12 + + fs/ext3/super.c | 11 + + include/linux/ext3_fs.h | 2 + 8 files changed, 301 insertions(+), 1 deletion(-) + +--- linux-2.4.18-18.8.0-l18/Documentation/filesystems/ext2.txt~iopen-2.4.18 Wed Jul 11 18:44:45 2001 ++++ linux-2.4.18-18.8.0-l18-phil/Documentation/filesystems/ext2.txt Sun May 18 13:31:09 2003 +@@ -35,6 +35,22 @@ resgid=n The group ID which may use th + + sb=n Use alternate superblock at this location. + ++iopen Makes an invisible pseudo-directory called ++ __iopen__ available in the root directory ++ of the filesystem. Allows open-by-inode- ++ number. i.e., inode 3145 can be accessed ++ via /mntpt/__iopen__/3145 ++ ++iopen_nopriv This option makes the iopen directory be ++ world-readable. This may be safer since it ++ allows daemons to run as an unprivileged user, ++ however it significantly changes the security ++ model of a Unix filesystem, since previously ++ all files under a mode 700 directory were not ++ generally avilable even if the ++ permissions on the file itself is ++ world-readable. ++ + grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. + + +--- linux-2.4.18-18.8.0-l18/fs/ext3/Makefile~iopen-2.4.18 Sun May 18 12:58:10 2003 ++++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/Makefile Sun May 18 13:34:51 2003 +@@ -11,7 +11,7 @@ O_TARGET := ext3.o + + export-objs := super.o inode.o xattr.o + +-obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ++obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o xattr.o + obj-m := $(O_TARGET) + +--- linux-2.4.18-18.8.0-l18/fs/ext3/inode.c~iopen-2.4.18 Sun May 18 13:24:49 2003 ++++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/inode.c Sun May 18 13:31:09 2003 +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include "iopen.h" + + /* + * SEARCH_FROM_ZERO forces each block allocation to search from the start +@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod + struct buffer_head *bh; + int block; + ++ if (ext3_iopen_get_inode(inode)) ++ return; ++ + if(ext3_get_inode_loc(inode, &iloc)) + goto bad_inode; + bh = iloc.bh; +--- /dev/null Thu Apr 11 10:25:15 2002 ++++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/iopen.c Sun May 18 13:31:09 2003 +@@ -0,0 +1,240 @@ ++ ++ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry) ++{ ++ struct inode * inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ //ino != EXT3_ACL_IDX_INO && ++ //ino != EXT3_ACL_DATA_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ alternate->d_vfs_flags |= DCACHE_REFERENCED; ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; ++ spin_unlock(&dcache_lock); ++ ++ d_add(dentry, inode); ++ return NULL; ++} ++ ++#define do_switch(x,y) do { \ ++ __typeof__ (x) __tmp = x; \ ++ x = y; y = __tmp; } while (0) ++ ++static inline void switch_names(struct dentry * dentry, struct dentry * target) ++{ ++ const unsigned char *old_name, *new_name; ++ ++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); ++ old_name = target->d_name.name; ++ new_name = dentry->d_name.name; ++ if (old_name == target->d_iname) ++ old_name = dentry->d_iname; ++ if (new_name == dentry->d_iname) ++ new_name = target->d_iname; ++ target->d_name.name = new_name; ++ dentry->d_name.name = old_name; ++} ++ ++ ++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ /* verify this dentry is really new */ ++ assert(!de->d_inode); ++ assert(list_empty(&de->d_subdirs)); ++ assert(list_empty(&de->d_alias)); ++ ++ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ ++ if (!goal) { ++ spin_unlock(&dcache_lock); ++ return NULL; ++ } ++ ++ /* Move the goal to the de hash queue */ ++ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; ++ list_del(&goal->d_hash); ++ list_add(&goal->d_hash, &de->d_hash); ++ ++ list_del(&goal->d_child); ++ list_del(&de->d_child); ++ ++ /* Switch the parents and the names.. */ ++ switch_names(goal, de); ++ do_switch(goal->d_parent, de->d_parent); ++ do_switch(goal->d_name.len, de->d_name.len); ++ do_switch(goal->d_name.hash, de->d_name.hash); ++ ++ /* And add them back to the (new) parent lists */ ++ list_add(&goal->d_child, &goal->d_parent->d_subdirs); ++ list_add(&de->d_child, &de->d_parent->d_subdirs); ++ ++ spin_unlock(&dcache_lock); ++ return goal; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry) ++{ ++ struct inode * inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode * inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ inode->u.ext3_i.i_dtime = 0; ++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size ++ * (for stat), not the fs block ++ * size */ ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +--- /dev/null Thu Apr 11 10:25:15 2002 ++++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/iopen.h Sun May 18 13:31:09 2003 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode * inode); ++ ++ +--- linux-2.4.18-18.8.0-l18/fs/ext3/namei.c~iopen-2.4.18 Sun May 18 12:58:14 2003 ++++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/namei.c Sun May 18 13:34:35 2003 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include "iopen.h" + + /* + * define how far ahead to read directories while searching them. +@@ -703,16 +704,21 @@ cleanup_and_exit: + brelse (bh_use[ra_ptr]); + return ret; + } ++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode); + + static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) + { + struct inode * inode; + struct ext3_dir_entry_2 * de; + struct buffer_head * bh; ++ struct dentry *alternate = NULL; + + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -723,6 +729,12 @@ static struct dentry *ext3_lookup(struct + if (!inode) + return ERR_PTR(-EACCES); + } ++ ++ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) { ++ iput(inode); ++ return alternate; ++ } ++ + d_add(dentry, inode); + return NULL; + } +--- linux-2.4.18-18.8.0-l18/fs/ext3/super.c~iopen-2.4.18 Sun May 18 13:24:55 2003 ++++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/super.c Sun May 18 13:31:09 2003 +@@ -602,6 +602,17 @@ static int parse_options (char * options + || !strcmp (this_char, "quota") + || !strcmp (this_char, "usrquota")) + /* Don't do anything ;-) */ ; ++ else if (!strcmp (this_char, "iopen")) { ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ } else if (!strcmp (this_char, "noiopen")) { ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ } ++ else if (!strcmp (this_char, "iopen_nopriv")) { ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ } + else if (!strcmp (this_char, "journal")) { + /* @@@ FIXME */ + /* Eventually we will want to be able to create +--- linux-2.4.18-18.8.0-l18/include/linux/ext3_fs.h~iopen-2.4.18 Sun May 18 13:31:05 2003 ++++ linux-2.4.18-18.8.0-l18-phil/include/linux/ext3_fs.h Sun May 18 13:33:03 2003 +@@ -320,6 +320,8 @@ struct ext3_inode { + #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ + #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ + #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ ++#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H + +_ diff --git a/lustre/kernel_patches/patches/iopen.patch b/lustre/kernel_patches/patches/iopen-2.4.20.patch similarity index 100% rename from lustre/kernel_patches/patches/iopen.patch rename to lustre/kernel_patches/patches/iopen-2.4.20.patch diff --git a/lustre/kernel_patches/pc/iopen.pc b/lustre/kernel_patches/pc/iopen-2.4.18.pc similarity index 100% rename from lustre/kernel_patches/pc/iopen.pc rename to lustre/kernel_patches/pc/iopen-2.4.18.pc diff --git a/lustre/kernel_patches/pc/iopen-2.4.20.pc b/lustre/kernel_patches/pc/iopen-2.4.20.pc new file mode 100644 index 0000000..b40b1f3 --- /dev/null +++ b/lustre/kernel_patches/pc/iopen-2.4.20.pc @@ -0,0 +1,8 @@ +Documentation/filesystems/ext2.txt +fs/ext3/Makefile +fs/ext3/inode.c +fs/ext3/iopen.c +fs/ext3/iopen.h +fs/ext3/namei.c +fs/ext3/super.c +include/linux/ext3_fs.h diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index d656918..8344af5 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -407,7 +407,9 @@ static int llu_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode) /* We held on to the request for replay until we saw a close for that * file. Now that we've closed it, it gets replayed on the basis of * its transno only. */ + spin_lock (&fd->fd_mds_och.och_req->rq_lock); fd->fd_mds_och.och_req->rq_replay = 0; + spin_unlock (&fd->fd_mds_och.och_req->rq_lock); if (fd->fd_mds_och.och_req->rq_transno) { /* This open created a file, so it needs replay as a diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index 8562b69..847b1d0 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -116,15 +116,10 @@ static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags) { struct llu_inode_info *lli = llu_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct obd_brw_set *set; struct brw_page pg; int rc; ENTRY; - set = obd_brw_set_new(); - if (set == NULL) - RETURN(-ENOMEM); - pg.pg = page; pg.off = ((obd_off)page->index) << PAGE_SHIFT; @@ -145,17 +140,10 @@ static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags) pg.flag = flags; - set->brw_callback = ll_brw_sync_wait; rc = obd_brw(cmd, llu_i2obdconn(inode), lsm, 1, &pg, set, NULL); if (rc) { - if (rc != -EIO) - CERROR("error from obd_brw: rc = %d\n", rc); - } else { - rc = ll_brw_sync_wait(set, CB_PHASE_START); - if (rc) - CERROR("error from callback: rc = %d\n", rc); + CERROR("error from obd_brw: rc = %d\n", rc); } - obd_brw_set_decref(set); RETURN(rc); } diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 8646e72..1a7e15b 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -38,6 +38,7 @@ extern int ptlrpc_init_portals(void); extern void ptlrpc_exit_portals(void); +static int ldlm_hooks_referenced = 0; int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int); int (*ptlrpc_ldlm_replay_locks)(struct obd_import *); @@ -78,6 +79,9 @@ if (ptlrpc_##hook) { \ void ptlrpc_put_ldlm_hooks(void) { ENTRY; + if (!ldlm_hooks_referenced) + return; + PUT_HOOK(ldlm_namespace_cleanup); PUT_HOOK(ldlm_replay_locks); ldlm_hooks_referenced = 0; @@ -125,19 +129,14 @@ EXPORT_SYMBOL(ptlrpc_cleanup_connection); /* niobuf.c */ EXPORT_SYMBOL(ptlrpc_bulk_put); EXPORT_SYMBOL(ptlrpc_bulk_get); -EXPORT_SYMBOL(ptlrpc_register_bulk_put); -EXPORT_SYMBOL(ptlrpc_register_bulk_get); EXPORT_SYMBOL(ptlrpc_abort_bulk); +EXPORT_SYMBOL(ptlrpc_register_bulk); +EXPORT_SYMBOL(ptlrpc_unregister_bulk); EXPORT_SYMBOL(ptlrpc_reply); EXPORT_SYMBOL(ptlrpc_error); EXPORT_SYMBOL(ptlrpc_resend_req); EXPORT_SYMBOL(ptl_send_rpc); EXPORT_SYMBOL(ptlrpc_link_svc_me); -EXPORT_SYMBOL(obd_brw_set_new); -EXPORT_SYMBOL(obd_brw_set_add); -EXPORT_SYMBOL(obd_brw_set_del); -EXPORT_SYMBOL(obd_brw_set_decref); -EXPORT_SYMBOL(obd_brw_set_addref); /* client.c */ EXPORT_SYMBOL(ptlrpc_init_client); @@ -149,7 +148,7 @@ EXPORT_SYMBOL(ptlrpc_replay_req); EXPORT_SYMBOL(ptlrpc_restart_req); EXPORT_SYMBOL(ptlrpc_prep_req); EXPORT_SYMBOL(ptlrpc_free_req); -EXPORT_SYMBOL(ptlrpc_abort); +EXPORT_SYMBOL(ptlrpc_unregister_reply); EXPORT_SYMBOL(ptlrpc_req_finished); EXPORT_SYMBOL(ptlrpc_request_addref); EXPORT_SYMBOL(ptlrpc_prep_bulk_imp); @@ -157,16 +156,14 @@ EXPORT_SYMBOL(ptlrpc_prep_bulk_exp); EXPORT_SYMBOL(ptlrpc_free_bulk); EXPORT_SYMBOL(ptlrpc_prep_bulk_page); EXPORT_SYMBOL(ptlrpc_free_bulk_page); -EXPORT_SYMBOL(ll_brw_sync_wait); EXPORT_SYMBOL(ptlrpc_abort_inflight); EXPORT_SYMBOL(ptlrpc_retain_replayable_request); EXPORT_SYMBOL(ptlrpc_next_xid); EXPORT_SYMBOL(ptlrpc_prep_set); -EXPORT_SYMBOL(ptlrpc_drop_set); EXPORT_SYMBOL(ptlrpc_set_add_req); -EXPORT_SYMBOL(ptlrpc_req_completed); -EXPORT_SYMBOL(ptlrpc_req_result); +EXPORT_SYMBOL(ptlrpc_set_destroy); +EXPORT_SYMBOL(ptlrpc_set_wait); /* service.c */ EXPORT_SYMBOL(ptlrpc_init_svc); -- 1.8.3.1