From 85c82e09f1bb74bef49c67734591331300138e11 Mon Sep 17 00:00:00 2001 From: phil Date: Mon, 6 Oct 2003 22:31:50 +0000 Subject: [PATCH] Merge most b_llp_hp features and fixes into b_devel: - some functional LDLM reorganization to support posix flocks - new posix flock code added, but not yet enabled in llite/ - kernel patches for NFS export and ext3 raw lookup - NFS export enabled, ext3 raw lookup not enabled - includes all LDLM patches from bug 1766 - locking to fix unlink/create inode re-use recovery race - added /proc tunables for pre-creation variables --- .../patches/nfs_export_kernel-2.4.20.patch | 749 +++++++++++++++++++++ .../kernel_patches/pc/nfs_export_kernel-2.4.20.pc | 9 + lustre/llite/llite_lib.c | 22 +- lustre/llite/llite_nfs.c | 177 +++++ lustre/osc/osc_rpcd.c | 3 +- lustre/tests/createmany-mpi.c | 131 ++++ 6 files changed, 1085 insertions(+), 6 deletions(-) create mode 100644 lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch create mode 100644 lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc create mode 100644 lustre/llite/llite_nfs.c create mode 100644 lustre/tests/createmany-mpi.c diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch new file mode 100644 index 0000000..1044a87 --- /dev/null +++ b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch @@ -0,0 +1,749 @@ + fs/Makefile | 3 + fs/file_table.c | 11 ++ + fs/inode.c | 23 ++++- + fs/namei.c | 12 ++ + fs/nfsd/nfsfh.c | 65 +++++++++++++- + fs/nfsd/vfs.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++----- + fs/super.c | 3 + include/linux/fs.h | 8 + + kernel/ksyms.c | 3 + 9 files changed, 333 insertions(+), 35 deletions(-) + +--- lum-2.4.20-l27/fs/Makefile~nfs_export_kernel-2.4.20 Thu Sep 25 03:31:17 2003 ++++ lum-2.4.20-l27-phil/fs/Makefile Thu Sep 25 03:30:18 2003 +@@ -7,7 +7,8 @@ + + O_TARGET := fs.o + +-export-objs := filesystems.o open.o dcache.o buffer.o inode.o ++export-objs := filesystems.o open.o dcache.o buffer.o inode.o namei.o \ ++ file_table.o + mod-subdirs := nls + + obj-y := open.o read_write.o devices.o file_table.o buffer.o \ +--- lum-2.4.20-l27/fs/file_table.c~nfs_export_kernel-2.4.20 Thu Nov 28 18:53:15 2002 ++++ lum-2.4.20-l27-phil/fs/file_table.c Thu Sep 25 03:25:12 2003 +@@ -82,7 +82,8 @@ struct file * get_empty_filp(void) + * and call the open function (if any). The caller must verify that + * inode->i_fop is not NULL. + */ +-int init_private_file(struct file *filp, struct dentry *dentry, int mode) ++int init_private_file_it(struct file *filp, struct dentry *dentry, int mode, ++ struct lookup_intent *it) + { + memset(filp, 0, sizeof(*filp)); + filp->f_mode = mode; +@@ -90,12 +91,20 @@ int init_private_file(struct file *filp, + filp->f_dentry = dentry; + filp->f_uid = current->fsuid; + filp->f_gid = current->fsgid; ++ if (it) ++ filp->f_it = it; + filp->f_op = dentry->d_inode->i_fop; + if (filp->f_op->open) + return filp->f_op->open(dentry->d_inode, filp); + else + return 0; + } ++EXPORT_SYMBOL(init_private_file_it); ++ ++int init_private_file(struct file *filp, struct dentry *dentry, int mode) ++{ ++ return init_private_file_it(filp, dentry, mode, NULL); ++} + + void fput(struct file * file) + { +--- lum-2.4.20-l27/fs/inode.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:45 2003 ++++ lum-2.4.20-l27-phil/fs/inode.c Thu Sep 25 02:30:56 2003 +@@ -970,9 +970,10 @@ struct inode *igrab(struct inode *inode) + } + + +-struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) ++static inline struct inode *ifind(struct super_block *sb, unsigned long ino, ++ struct list_head *head, ++ find_inode_t find_actor, void *opaque) + { +- struct list_head * head = inode_hashtable + hash(sb,ino); + struct inode * inode; + + spin_lock(&inode_lock); +@@ -985,6 +986,24 @@ struct inode *iget4(struct super_block * + } + spin_unlock(&inode_lock); + ++ return NULL; ++} ++ ++struct inode *ilookup4(struct super_block *sb, unsigned long ino, ++ find_inode_t find_actor, void *opaque) ++{ ++ struct list_head * head = inode_hashtable + hash(sb,ino); ++ return ifind(sb, ino, head, find_actor, opaque); ++} ++ ++struct inode *iget4(struct super_block *sb, unsigned long ino, ++ find_inode_t find_actor, void *opaque) ++{ ++ struct list_head * head = inode_hashtable + hash(sb,ino); ++ struct inode *inode = ifind(sb, ino, head, find_actor, opaque); ++ if (inode) ++ return inode; ++ + /* + * get_new_inode() will do the right thing, re-trying the search + * in case it had to block at any point. +--- lum-2.4.20-l27/fs/namei.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:45 2003 ++++ lum-2.4.20-l27-phil/fs/namei.c Thu Sep 25 03:48:58 2003 +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -100,6 +101,7 @@ void intent_release(struct lookup_intent + it->it_op_release(it); + + } ++EXPORT_SYMBOL(intent_release); + + /* In order to reduce some races, while at the same time doing additional + * checking and hopefully speeding things up, we copy filenames to the +@@ -900,7 +902,8 @@ struct dentry * lookup_hash(struct qstr + + + /* SMP-safe */ +-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, ++ int len, struct lookup_intent *it) + { + unsigned long hash; + struct qstr this; +@@ -920,11 +923,16 @@ struct dentry * lookup_one_len(const cha + } + this.hash = end_name_hash(hash); + +- return lookup_hash_it(&this, base, NULL); ++ return lookup_hash_it(&this, base, it); + access: + return ERR_PTR(-EACCES); + } + ++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++{ ++ return lookup_one_len_it(name, base, len, NULL); ++} ++ + /* + * namei() + * +--- lum-2.4.20-l27/fs/nfsd/nfsfh.c~nfs_export_kernel-2.4.20 Thu Nov 28 18:53:15 2002 ++++ lum-2.4.20-l27-phil/fs/nfsd/nfsfh.c Thu Sep 25 02:30:56 2003 +@@ -36,6 +36,15 @@ struct nfsd_getdents_callback { + int sequence; /* sequence counter */ + }; + ++static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry) ++{ ++ if (inode->i_op->lookup_it) ++ return inode->i_op->lookup_it(inode, dentry, NULL, 0); ++ else ++ return inode->i_op->lookup(inode, dentry); ++ ++} ++ + /* + * A rather strange filldir function to capture + * the name matching the specified inode number. +@@ -75,6 +84,8 @@ static int nfsd_get_name(struct dentry * + int error; + struct file file; + struct nfsd_getdents_callback buffer; ++ struct lookup_intent it; ++ struct file *filp = NULL; + + error = -ENOTDIR; + if (!dir || !S_ISDIR(dir->i_mode)) +@@ -85,9 +96,37 @@ static int nfsd_get_name(struct dentry * + /* + * Open the directory ... + */ +- error = init_private_file(&file, dentry, FMODE_READ); +- if (error) ++ if (dentry->d_op && dentry->d_op->d_revalidate_it) { ++ if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) && ++ (dentry->d_parent == dentry) ) { ++ it.it_op_release = NULL; ++ /* ++ * XXX Temporary Hack: Simulating init_private_file without ++ * f_op->open for disconnected dentry Since we don't have actual ++ * dentry->d_name to revalidate in revalidate_it() ++ */ ++ filp = &file; ++ memset(filp, 0, sizeof(*filp)); ++ filp->f_mode = FMODE_READ; ++ atomic_set(&filp->f_count, 1); ++ filp->f_dentry = dentry; ++ filp->f_uid = current->fsuid; ++ filp->f_gid = current->fsgid; ++ filp->f_op = dentry->d_inode->i_fop; ++ error = 0; ++ } else { ++ intent_init(&it, IT_OPEN, 0); ++ error = revalidate_it(dentry, &it); ++ if (error) ++ goto out; ++ error = init_private_file_it(&file, dentry, FMODE_READ, &it); ++ } ++ } else { ++ error = init_private_file_it(&file, dentry, FMODE_READ, NULL); ++ } ++ if (error) + goto out; ++ + error = -EINVAL; + if (!file.f_op->readdir) + goto out_close; +@@ -113,9 +152,13 @@ static int nfsd_get_name(struct dentry * + } + + out_close: +- if (file.f_op->release) ++ if (file.f_op->release && !filp) + file.f_op->release(dir, &file); + out: ++ if (dentry->d_op && ++ dentry->d_op->d_revalidate_it && ++ it.it_op_release && !filp) ++ intent_release(&it); + return error; + } + +@@ -274,7 +317,7 @@ struct dentry *nfsd_findparent(struct de + * it is well connected. But nobody returns different dentrys do they? + */ + down(&child->d_inode->i_sem); +- pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); ++ pdentry = lookup_it(child->d_inode, tdentry); + up(&child->d_inode->i_sem); + d_drop(tdentry); /* we never want ".." hashed */ + if (!pdentry && tdentry->d_inode == NULL) { +@@ -306,6 +349,8 @@ struct dentry *nfsd_findparent(struct de + igrab(tdentry->d_inode); + pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; + } ++ if (child->d_op && child->d_op->d_revalidate_it) ++ pdentry->d_op = child->d_op; + } + if (pdentry == NULL) + pdentry = ERR_PTR(-ENOMEM); +@@ -463,6 +508,8 @@ find_fh_dentry(struct super_block *sb, _ + struct dentry *pdentry; + struct inode *parent; + ++ if (result->d_op && result->d_op->d_revalidate_it) ++ dentry->d_op = result->d_op; + pdentry = nfsd_findparent(dentry); + err = PTR_ERR(pdentry); + if (IS_ERR(pdentry)) +@@ -662,6 +709,11 @@ fh_verify(struct svc_rqst *rqstp, struct + + inode = dentry->d_inode; + ++ /* cache coherency for non-device filesystems */ ++ if (inode->i_op && inode->i_op->revalidate_it) { ++ inode->i_op->revalidate_it(dentry, NULL); ++ } ++ + /* Type check. The correct error return for type mismatches + * does not seem to be generally agreed upon. SunOS seems to + * use EISDIR if file isn't S_IFREG; a comment in the NFSv3 +@@ -900,8 +952,9 @@ out_negative: + dentry->d_parent->d_name.name, dentry->d_name.name); + goto out; + out_uptodate: +- printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", +- dentry->d_parent->d_name.name, dentry->d_name.name); ++ if(!dentry->d_parent->d_inode->i_op->mkdir_raw) ++ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", ++ dentry->d_parent->d_name.name, dentry->d_name.name); + goto out; + } + +--- lum-2.4.20-l27/fs/nfsd/vfs.c~nfs_export_kernel-2.4.20 Thu Nov 28 18:53:15 2002 ++++ lum-2.4.20-l27-phil/fs/nfsd/vfs.c Thu Sep 25 03:05:28 2003 +@@ -77,6 +77,128 @@ struct raparms { + static struct raparms * raparml; + static struct raparms * raparm_cache; + ++static int link_raw(struct dentry *dold, struct dentry *ddir, ++ struct dentry *dnew) ++{ ++ int err; ++ ++ struct nameidata old_nd = { .dentry = dold }; ++ struct nameidata nd = { .dentry = ddir, .last = dnew->d_name }; ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ err = op->link_raw(&old_nd, &nd); ++ d_instantiate(dnew, dold->d_inode); ++ if(dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it) ++ dold->d_inode->i_op->revalidate_it(dnew, NULL); ++ ++ return err; ++} ++ ++static int unlink_raw(struct dentry *dentry, char *fname, int flen, ++ struct dentry *rdentry) ++{ ++ int err; ++ struct qstr last = { .name = fname, .len = flen }; ++ struct nameidata nd = { .dentry = dentry, .last = last }; ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ err = op->unlink_raw(&nd); ++ if (!err) ++ d_delete(rdentry); ++ ++ return err; ++} ++ ++static int rmdir_raw(struct dentry *dentry, char *fname, int flen, ++ struct dentry *rdentry) ++{ ++ int err; ++ struct qstr last = { .name = fname, .len = flen }; ++ struct nameidata nd = { .dentry = dentry, .last = last }; ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ err = op->rmdir_raw(&nd); ++ if(!err) { ++ rdentry->d_inode->i_flags |= S_DEAD; ++ d_delete(rdentry); ++ } ++ ++ return err; ++} ++ ++static int symlink_raw(struct dentry *dentry, char *fname, int flen, ++ char *path) ++{ ++ int err; ++ struct qstr last = { .name = fname, .len = flen }; ++ struct nameidata nd = { .dentry = dentry, .last = last }; ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ err = op->symlink_raw(&nd, path); ++ ++ return err; ++} ++ ++static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode) ++{ ++ int err; ++ struct qstr last = { .name = fname, .len = flen }; ++ struct nameidata nd = { .dentry = dentry, .last = last }; ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ err = op->mkdir_raw(&nd, mode); ++ ++ return err; ++} ++ ++static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode, ++ dev_t dev) ++{ ++ int err; ++ struct qstr last = { .name = fname, .len = flen }; ++ struct nameidata nd = { .dentry = dentry, .last = last }; ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ err = op->mknod_raw(&nd, mode, dev); ++ ++ return err; ++} ++ ++static int rename_raw(struct dentry *fdentry, struct dentry *tdentry, ++ struct dentry *odentry, struct dentry *ndentry) ++{ ++ int err; ++ ++ struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name}; ++ struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name}; ++ struct inode_operations *op = old_nd.dentry->d_inode->i_op; ++ err = op->rename_raw(&old_nd, &new_nd); ++ d_move(odentry, ndentry); ++ ++ return err; ++} ++ ++static int setattr_raw(struct inode *inode, struct iattr *iap) ++{ ++ int err; ++ ++ iap->ia_valid |= ATTR_RAW; ++ err = inode->i_op->setattr_raw(inode, iap); ++ ++ return err; ++} ++ ++int revalidate_it(struct dentry *dentry, struct lookup_intent *it) ++{ ++ int err = 0; ++ ++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { ++ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) && ++ !d_invalidate(dentry)) { ++ dput(dentry); ++ err = -EINVAL; ++ dentry = NULL; ++ return err; ++ } ++ } ++ ++ return err; ++} ++ + /* + * Look up one component of a pathname. + * N.B. After this call _both_ fhp and resfh need an fh_put +@@ -300,7 +422,10 @@ nfsd_setattr(struct svc_rqst *rqstp, str + } + err = nfserr_notsync; + if (!check_guard || guardtime == inode->i_ctime) { +- err = notify_change(dentry, iap); ++ if ( dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw) ++ err = setattr_raw(dentry->d_inode, iap); ++ else ++ err = notify_change(dentry, iap); + err = nfserrno(err); + } + if (size_change) { +@@ -427,6 +552,7 @@ nfsd_open(struct svc_rqst *rqstp, struct + { + struct dentry *dentry; + struct inode *inode; ++ struct lookup_intent it; + int err; + + /* If we get here, then the client has already done an "open", and (hopefully) +@@ -473,6 +599,14 @@ nfsd_open(struct svc_rqst *rqstp, struct + filp->f_mode = FMODE_READ; + } + ++ intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode); ++ ++ err = revalidate_it(dentry, &it); ++ if (err) ++ goto out_nfserr; ++ ++ filp->f_it = ⁢ ++ + err = 0; + if (filp->f_op && filp->f_op->open) { + err = filp->f_op->open(inode, filp); +@@ -487,7 +621,11 @@ nfsd_open(struct svc_rqst *rqstp, struct + atomic_dec(&filp->f_count); + } + } ++ + out_nfserr: ++ if (it.it_op_release) ++ intent_release(&it); ++ + if (err) + err = nfserrno(err); + out: +@@ -818,7 +956,7 @@ nfsd_create(struct svc_rqst *rqstp, stru + { + struct dentry *dentry, *dchild; + struct inode *dirp; +- int err; ++ int err, error = -EOPNOTSUPP; + + err = nfserr_perm; + if (!flen) +@@ -834,20 +972,44 @@ nfsd_create(struct svc_rqst *rqstp, stru + dentry = fhp->fh_dentry; + dirp = dentry->d_inode; + ++ switch (type) { ++ case S_IFDIR: ++ if (dirp->i_op->mkdir_raw) ++ error = mkdir_raw(dentry, fname, flen, iap->ia_mode); ++ break; ++ case S_IFCHR: ++ case S_IFBLK: ++ case S_IFIFO: ++ case S_IFSOCK: ++ case S_IFREG: ++ if (dirp->i_op->mknod_raw) { ++ if (type == S_IFREG) ++ rdev = 0; ++ error = mknod_raw(dentry, fname, flen, iap->ia_mode, rdev); ++ } ++ break; ++ default: ++ printk("nfsd: bad file type %o in nfsd_create\n", type); ++ } ++ + err = nfserr_notdir; +- if(!dirp->i_op || !dirp->i_op->lookup) ++ if(!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) + goto out; + /* + * Check whether the response file handle has been verified yet. + * If it has, the parent directory should already be locked. + */ +- if (!resfhp->fh_dentry) { +- /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ +- fh_lock(fhp); ++ if (!resfhp->fh_dentry || dirp->i_op->lookup_it) { ++ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create ++ and nfsd_proc_create in case of lustre ++ */ ++ if (!resfhp->fh_dentry) ++ fh_lock(fhp); + dchild = lookup_one_len(fname, dentry, flen); + err = PTR_ERR(dchild); + if (IS_ERR(dchild)) + goto out_nfserr; ++ resfhp->fh_dentry = NULL; + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); + if (err) + goto out; +@@ -868,10 +1030,12 @@ nfsd_create(struct svc_rqst *rqstp, stru + * Make sure the child dentry is still negative ... + */ + err = nfserr_exist; +- if (dchild->d_inode) { +- dprintk("nfsd_create: dentry %s/%s not negative!\n", +- dentry->d_name.name, dchild->d_name.name); +- goto out; ++ if ( error == -EOPNOTSUPP) { ++ if (dchild->d_inode) { ++ dprintk("nfsd_create: dentry %s/%s not negative!\n", ++ dentry->d_name.name, dchild->d_name.name); ++ goto out; ++ } + } + + if (!(iap->ia_valid & ATTR_MODE)) +@@ -884,16 +1048,19 @@ nfsd_create(struct svc_rqst *rqstp, stru + err = nfserr_perm; + switch (type) { + case S_IFREG: +- err = vfs_create(dirp, dchild, iap->ia_mode); ++ if (error == -EOPNOTSUPP) ++ err = vfs_create(dirp, dchild, iap->ia_mode); + break; + case S_IFDIR: +- err = vfs_mkdir(dirp, dchild, iap->ia_mode); ++ if (error == -EOPNOTSUPP) ++ err = vfs_mkdir(dirp, dchild, iap->ia_mode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: +- err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); ++ if (error == -EOPNOTSUPP) ++ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + break; + default: + printk("nfsd: bad file type %o in nfsd_create\n", type); +@@ -962,7 +1129,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, s + /* Get all the sanity checks out of the way before + * we lock the parent. */ + err = nfserr_notdir; +- if(!dirp->i_op || !dirp->i_op->lookup) ++ if (dirp->i_op->mknod_raw) { ++ err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0); ++ if (err && err != -EOPNOTSUPP) ++ goto out; ++ } ++ ++ if(!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it)) + goto out; + fh_lock(fhp); + +@@ -1013,6 +1186,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, s + case NFS3_CREATE_GUARDED: + err = nfserr_exist; + } ++ if(dirp->i_op->mknod_raw) ++ err = 0; + goto out; + } + +@@ -1119,7 +1294,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str + struct iattr *iap) + { + struct dentry *dentry, *dnew; +- int err, cerr; ++ int err, cerr, error = -EOPNOTSUPP; + + err = nfserr_noent; + if (!flen || !plen) +@@ -1133,12 +1308,18 @@ nfsd_symlink(struct svc_rqst *rqstp, str + goto out; + fh_lock(fhp); + dentry = fhp->fh_dentry; ++ ++ if (dentry->d_inode->i_op->symlink_raw) ++ error = symlink_raw(dentry, fname, flen, path); ++ + dnew = lookup_one_len(fname, dentry, flen); + err = PTR_ERR(dnew); + if (IS_ERR(dnew)) + goto out_nfserr; + +- err = vfs_symlink(dentry->d_inode, dnew, path); ++ err = error; ++ if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw) ++ err = vfs_symlink(dentry->d_inode, dnew, path); + if (!err) { + if (EX_ISSYNC(fhp->fh_export)) + nfsd_sync_dir(dentry); +@@ -1148,7 +1329,10 @@ nfsd_symlink(struct svc_rqst *rqstp, str + iap->ia_valid |= ATTR_CTIME; + iap->ia_mode = (iap->ia_mode&S_IALLUGO) + | S_IFLNK; +- err = notify_change(dnew, iap); ++ if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw) ++ err = setattr_raw(dnew->d_inode, iap); ++ else ++ err = notify_change(dnew, iap); + if (!err && EX_ISSYNC(fhp->fh_export)) + write_inode_now(dentry->d_inode, 1); + } +@@ -1206,7 +1390,10 @@ nfsd_link(struct svc_rqst *rqstp, struct + dold = tfhp->fh_dentry; + dest = dold->d_inode; + +- err = vfs_link(dold, dirp, dnew); ++ if (dirp->i_op->link_raw) ++ err = link_raw(dold, ddir, dnew); ++ else ++ err = vfs_link(dold, dirp, dnew); + if (!err) { + if (EX_ISSYNC(ffhp->fh_export)) { + nfsd_sync_dir(ddir); +@@ -1291,7 +1478,10 @@ nfsd_rename(struct svc_rqst *rqstp, stru + err = nfserr_perm; + } else + #endif +- err = vfs_rename(fdir, odentry, tdir, ndentry); ++ if(fdir->i_op->rename_raw) ++ err = rename_raw(fdentry, tdentry, odentry, ndentry); ++ else ++ err = vfs_rename(fdir, odentry, tdir, ndentry); + if (!err && EX_ISSYNC(tfhp->fh_export)) { + nfsd_sync_dir(tdentry); + nfsd_sync_dir(fdentry); +@@ -1312,7 +1502,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru + fill_post_wcc(tfhp); + double_up(&tdir->i_sem, &fdir->i_sem); + ffhp->fh_locked = tfhp->fh_locked = 0; +- ++ + out: + return err; + } +@@ -1358,9 +1548,15 @@ nfsd_unlink(struct svc_rqst *rqstp, stru + err = nfserr_perm; + } else + #endif +- err = vfs_unlink(dirp, rdentry); ++ if (dirp->i_op->unlink_raw) ++ err = unlink_raw(dentry, fname, flen, rdentry); ++ else ++ err = vfs_unlink(dirp, rdentry); + } else { /* It's RMDIR */ +- err = vfs_rmdir(dirp, rdentry); ++ if (dirp->i_op->rmdir_raw) ++ err = rmdir_raw(dentry, fname, flen, rdentry); ++ else ++ err = vfs_rmdir(dirp, rdentry); + } + + dput(rdentry); +--- lum-2.4.20-l27/fs/super.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:45 2003 ++++ lum-2.4.20-l27-phil/fs/super.c Thu Sep 25 02:30:56 2003 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #include + +@@ -51,7 +52,7 @@ spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; + */ + + static struct file_system_type *file_systems; +-static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED; ++rwlock_t file_systems_lock = RW_LOCK_UNLOCKED; + + /* WARNING: This can be used only if we _already_ own a reference */ + static void get_filesystem(struct file_system_type *fs) +--- lum-2.4.20-l27/include/linux/fs.h~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:55 2003 ++++ lum-2.4.20-l27-phil/include/linux/fs.h Thu Sep 25 03:25:37 2003 +@@ -1098,6 +1098,9 @@ extern int open_namei_it(const char *fil + struct nameidata *nd, struct lookup_intent *it); + extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, + int flags, struct lookup_intent *it); ++extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it); ++extern int init_private_file_it(struct file *, struct dentry *dentry, int mode, ++ struct lookup_intent *it); + extern int filp_close(struct file *, fl_owner_t id); + extern char * getname(const char *); + +@@ -1368,6 +1371,8 @@ extern void path_release(struct nameidat + extern int follow_down(struct vfsmount **, struct dentry **); + extern int follow_up(struct vfsmount **, struct dentry **); + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); ++extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int, ++ struct lookup_intent *); + extern struct dentry * lookup_hash(struct qstr *, struct dentry *); + #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) + #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) +@@ -1381,6 +1386,8 @@ extern ino_t iunique(struct super_block + + typedef int (*find_inode_t)(struct inode *, unsigned long, void *); + extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *); ++extern struct inode * ilookup4(struct super_block *, unsigned long, ++ find_inode_t, void *); + static inline struct inode *iget(struct super_block *sb, unsigned long ino) + { + return iget4(sb, ino, NULL, NULL); +@@ -1496,6 +1503,7 @@ extern int dcache_dir_fsync(struct file + extern int dcache_readdir(struct file *, void *, filldir_t); + extern struct file_operations dcache_dir_ops; + ++extern rwlock_t file_systems_lock; + extern struct file_system_type *get_fs_type(const char *name); + extern struct super_block *get_super(kdev_t); + extern void drop_super(struct super_block *sb); +--- lum-2.4.20-l27/kernel/ksyms.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:49 2003 ++++ lum-2.4.20-l27-phil/kernel/ksyms.c Thu Sep 25 02:30:56 2003 +@@ -146,6 +146,7 @@ EXPORT_SYMBOL(fget); + EXPORT_SYMBOL(igrab); + EXPORT_SYMBOL(iunique); + EXPORT_SYMBOL(iget4); ++EXPORT_SYMBOL(ilookup4); + EXPORT_SYMBOL(iput); + EXPORT_SYMBOL(force_delete); + EXPORT_SYMBOL(follow_up); +@@ -156,6 +157,7 @@ EXPORT_SYMBOL(path_walk); + EXPORT_SYMBOL(path_release); + EXPORT_SYMBOL(__user_walk); + EXPORT_SYMBOL(lookup_one_len); ++EXPORT_SYMBOL(lookup_one_len_it); + EXPORT_SYMBOL(lookup_hash); + EXPORT_SYMBOL(sys_close); + EXPORT_SYMBOL(dcache_lock); +@@ -590,3 +592,4 @@ EXPORT_SYMBOL(pidhash); + + /* debug */ + EXPORT_SYMBOL(dump_stack); ++EXPORT_SYMBOL(file_systems_lock); + +_ diff --git a/lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc b/lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc new file mode 100644 index 0000000..622704f --- /dev/null +++ b/lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc @@ -0,0 +1,9 @@ +fs/Makefile +fs/file_table.c +fs/inode.c +fs/namei.c +fs/nfsd/nfsfh.c +fs/nfsd/vfs.c +fs/super.c +include/linux/fs.h +kernel/ksyms.c diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index d633069..9b0d77e 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -135,6 +135,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) struct lustre_handle mdc_conn = {0, }; struct lustre_md md; class_uuid_t uuid; + kdev_t devno; ENTRY; @@ -192,6 +193,13 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = log2(osfs.os_bsize); sb->s_magic = LL_SUPER_MAGIC; sb->s_maxbytes = PAGE_CACHE_MAXBYTES; + + devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, + strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid)); + write_lock(&file_systems_lock); + sb->s_type->fs_flags = FS_REQUIRES_DEV; + write_unlock(&file_systems_lock); + sb->s_dev = devno; obd = class_name2obd(osc); if (!obd) { @@ -274,6 +282,10 @@ void ll_put_super(struct super_block *sb) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); + write_lock(&file_systems_lock); + sb->s_type->fs_flags = 0; + write_unlock(&file_systems_lock); + list_del(&sbi->ll_conn_chain); obd_disconnect(sbi->ll_osc_exp, 0); @@ -312,8 +324,8 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) { struct inode *inode; l_lock(&lock->l_resource->lr_namespace->ns_lock); - if (lock->l_data) - inode = igrab(lock->l_data); + if (lock->l_ast_data) + inode = igrab(lock->l_ast_data); else inode = NULL; l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -322,8 +334,8 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) static int null_if_equal(struct ldlm_lock *lock, void *data) { - if (data == lock->l_data) - lock->l_data = NULL; + if (data == lock->l_ast_data) + lock->l_ast_data = NULL; if (lock->l_req_mode != lock->l_granted_mode) return LDLM_ITER_STOP; @@ -904,7 +916,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, } else { LASSERT(sb); *inode = ll_iget(sb, md.body->ino, &md); - if (!*inode) { + if (*inode == NULL || is_bad_inode(*inode)) { /* free the lsm if we allocated one above */ if (md.lsm != NULL) obd_free_memmd(exp, &md.lsm); diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c new file mode 100644 index 0000000..046e959 --- /dev/null +++ b/lustre/llite/llite_nfs.c @@ -0,0 +1,177 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * NFS export of Lustre Light File System + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE +#include +#include "llite_internal.h" + +__u32 get_uuid2int(const char *name, int len) +{ + __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9; + while (len--) { + __u32 key = key1 + (key0 ^ (*name++ * 7152373)); + if (key & 0x80000000) key -= 0x7fffffff; + key1 = key0; + key0 = key; + } + return (key0 << 1); +} + +static struct inode * search_inode_for_lustre(struct super_block *sb, + unsigned long ino, + unsigned long generation, + int mode) +{ + struct ptlrpc_request *req = NULL; + struct ll_sb_info *sbi = ll_s2sbi(sb); + struct ll_fid fid; + unsigned long valid = 0; + int eadatalen = 0, rc; + struct inode *inode = NULL; + + inode = ilookup4(sb, ino, NULL, NULL); + if (inode) + return inode; + if (S_ISREG(mode)) { + eadatalen = obd_size_diskmd(sbi->ll_osc_exp, NULL); + valid |= OBD_MD_FLEASIZE; + } + fid.id = (__u64)ino; + fid.generation = generation; + fid.f_type = mode; + + rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req); + if (rc) { + CERROR("failure %d inode %lu\n", rc, ino); + return ERR_PTR(rc); + } + + rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, 0, sb); + if (rc) { + ptlrpc_req_finished(req); + return ERR_PTR(rc); + } + ptlrpc_req_finished(req); + + return inode; +} + +extern struct dentry_operations ll_d_ops; + +static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino, + __u32 generation, umode_t mode) +{ + struct inode *inode; + struct dentry *result; + struct list_head *lp; + + if (ino == 0) + return ERR_PTR(-ESTALE); + + inode = search_inode_for_lustre(sb, ino, generation, mode); + if (IS_ERR(inode)) { + return ERR_PTR(PTR_ERR(inode)); + } + if (is_bad_inode(inode) + || (generation && inode->i_generation != generation) + ){ + /* we didn't find the right inode.. */ + CERROR(" Inode %lu, Bad count: %d %d or version %u %u\n", + inode->i_ino, + inode->i_nlink, + atomic_read(&inode->i_count), + inode->i_generation, + generation); + iput(inode); + return ERR_PTR(-ESTALE); + } + + /* now to find a dentry. + * If possible, get a well-connected one + */ + spin_lock(&dcache_lock); + for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) { + result = list_entry(lp,struct dentry, d_alias); + if (!(result->d_flags & DCACHE_NFSD_DISCONNECTED)) { + dget_locked(result); + result->d_vfs_flags |= DCACHE_REFERENCED; + spin_unlock(&dcache_lock); + iput(inode); + return result; + } + } + spin_unlock(&dcache_lock); + result = d_alloc_root(inode); + if (result == NULL) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + result->d_flags |= DCACHE_NFSD_DISCONNECTED; + ll_set_dd(result); + result->d_op = &ll_d_ops; + return result; +} + +struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len, + int fhtype, int parent) +{ + switch (fhtype) { + case 2: + if (len < 5) + break; + if (parent) + return ll_iget_for_nfs(sb, data[3], 0, data[4]); + case 1: + if (len < 3) + break; + if (parent) + break; + return ll_iget_for_nfs(sb, data[0], data[1], data[2]); + default: break; + } + return ERR_PTR(-EINVAL); +} + +int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp, + int need_parent) +{ + if (*lenp < 3) + return 255; + *datap++ = dentry->d_inode->i_ino; + *datap++ = dentry->d_inode->i_generation; + *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode); + + if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) { + *lenp = 3; + return 1; + } + if (dentry->d_parent) { + *datap++ = dentry->d_parent->d_inode->i_ino; + *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode); + + *lenp = 5; + return 2; + } + *lenp = 3; + return 1; +} diff --git a/lustre/osc/osc_rpcd.c b/lustre/osc/osc_rpcd.c index c9b7691..9db5db2 100644 --- a/lustre/osc/osc_rpcd.c +++ b/lustre/osc/osc_rpcd.c @@ -102,11 +102,12 @@ static int osc_rpcd_check(struct osc_rpcd_ctl *orc) req = list_entry(pos, struct ptlrpc_request, rq_set_chain); list_del_init(&req->rq_set_chain); ptlrpc_set_add_req(orc->orc_set, req); + rc = 1; /* need to calculate its timeout */ } spin_unlock_irqrestore(&orc->orc_set->set_new_req_lock, flags); if (orc->orc_set->set_remaining) { - rc = ptlrpc_check_set(orc->orc_set); + rc = rc | ptlrpc_check_set(orc->orc_set); /* XXX our set never completes, so we prune the completed * reqs after each iteration. boy could this be smarter. */ diff --git a/lustre/tests/createmany-mpi.c b/lustre/tests/createmany-mpi.c new file mode 100644 index 0000000..1474b7b --- /dev/null +++ b/lustre/tests/createmany-mpi.c @@ -0,0 +1,131 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpi.h" + +void usage(char *prog) +{ + printf("usage: %s {-o|-m} filenamefmt count\n", prog); + printf(" %s {-o|-m} filenamefmt -seconds\n", prog); + printf(" %s {-o|-m} filenamefmt start count\n", prog); +} + +/* Print process rank, loop count, message, and exit (i.e. a fatal error) */ +int rprintf(int rank, int loop, const char *fmt, ...) +{ + va_list ap; + + printf("rank %d, loop %d: ", rank, loop); + + va_start(ap, fmt); + + printf(fmt, ap); + + MPI_Finalize(); + exit(1); +} + +int main(int argc, char ** argv) +{ + int i, rc = 0, do_open, rank; + char format[4096], *fmt; + char filename[4096]; + long start, last, end; + long begin = 0, count; + + rc = MPI_Init(&argc, &argv); + if (rc != MPI_SUCCESS) + rprintf(-1, -1, "MPI_Init failed: %d\n", rc); + + if (argc < 4 || argc > 5) { + usage(argv[0]); + return 1; + } + + if (strcmp(argv[1], "-o") == 0) { + do_open = 1; + } else if (strcmp(argv[1], "-m") == 0) { + do_open = 0; + } else { + usage(argv[0]); + return 1; + } + + if (strlen(argv[2]) > 4080) { + printf("name too long\n"); + return 1; + } + + rc = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rc != MPI_SUCCESS) + rprintf(-1, -1, "MPI_Comm_rank failed: %d\n", rc); + + rc = MPI_Barrier(MPI_COMM_WORLD); + if (rc != MPI_SUCCESS) + rprintf(rank, -1, "prep MPI_Barrier failed: %d\n", rc); + + start = last = time(0); + + if (argc == 4) { + end = strtol(argv[3], NULL, 0); + if (end > 0) { + count = end; + end = -1UL >> 1; + } else { + end = start - end; + count = -1UL >> 1; + } + } else { + end = -1UL >> 1; + begin = strtol(argv[3], NULL, 0); + count = strtol(argv[4], NULL, 0); + } + + if (strchr(argv[2], '%')) + fmt = argv[2]; + else { + sprintf(format, "%s%%d", argv[2]); + fmt = format; + } + printf("starting at %s", ctime(&start)); + for (i = 0; i < count && time(0) < end; i++, begin++) { + sprintf(filename, fmt, begin); + if (do_open) { + int fd = open(filename, O_CREAT|O_RDWR, 0644); + if (fd < 0) { + printf("open(%s) error: %s\n", filename, + strerror(errno)); + rc = errno; + break; + } + close(fd); + } else { + rc = mknod(filename, S_IFREG| 0444, 0); + if (rc) { + printf("mknod(%s) error: %s\n", + filename, strerror(errno)); + rc = errno; + break; + } + } + if ((i % 10000) == 0) { + printf(" - created %d (time %ld ; total %ld ; last %ld)\n", + i, time(0), time(0) - start, time(0) - last); + last = time(0); + } + } + printf("total: %d creates in %ld seconds: %f creates/second\n", i, + time(0) - start, ((float)i / (time(0) - start))); + start = time(0); + printf("finish at %s", ctime(&start)); + + return rc; +} -- 1.8.3.1