Whamcloud - gitweb
Merge most b_llp_hp features and fixes into b_devel:
authorphil <phil>
Mon, 6 Oct 2003 22:31:50 +0000 (22:31 +0000)
committerphil <phil>
Mon, 6 Oct 2003 22:31:50 +0000 (22:31 +0000)
 - some functional LDLM reorganization to support posix flocks
 - new posix flock code added, but not yet enabled in llite/
 - kernel patches for NFS export and ext3 raw lookup
 - NFS export enabled, ext3 raw lookup not enabled
 - includes all LDLM patches from bug 1766
 - locking to fix unlink/create inode re-use recovery race
 - added /proc tunables for pre-creation variables

lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch [new file with mode: 0644]
lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc [new file with mode: 0644]
lustre/llite/llite_lib.c
lustre/llite/llite_nfs.c [new file with mode: 0644]
lustre/osc/osc_rpcd.c
lustre/tests/createmany-mpi.c [new file with mode: 0644]

diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.20.patch
new file mode 100644 (file)
index 0000000..1044a87
--- /dev/null
@@ -0,0 +1,749 @@
+ fs/Makefile        |    3 
+ fs/file_table.c    |   11 ++
+ fs/inode.c         |   23 ++++-
+ fs/namei.c         |   12 ++
+ fs/nfsd/nfsfh.c    |   65 +++++++++++++-
+ fs/nfsd/vfs.c      |  240 ++++++++++++++++++++++++++++++++++++++++++++++++-----
+ fs/super.c         |    3 
+ include/linux/fs.h |    8 +
+ kernel/ksyms.c     |    3 
+ 9 files changed, 333 insertions(+), 35 deletions(-)
+
+--- lum-2.4.20-l27/fs/Makefile~nfs_export_kernel-2.4.20        Thu Sep 25 03:31:17 2003
++++ lum-2.4.20-l27-phil/fs/Makefile    Thu Sep 25 03:30:18 2003
+@@ -7,7 +7,8 @@
+ O_TARGET := fs.o
+-export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o
++export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o namei.o \
++              file_table.o
+ mod-subdirs :=        nls
+ obj-y :=      open.o read_write.o devices.o file_table.o buffer.o \
+--- lum-2.4.20-l27/fs/file_table.c~nfs_export_kernel-2.4.20    Thu Nov 28 18:53:15 2002
++++ lum-2.4.20-l27-phil/fs/file_table.c        Thu Sep 25 03:25:12 2003
+@@ -82,7 +82,8 @@ struct file * get_empty_filp(void)
+  * and call the open function (if any).  The caller must verify that
+  * inode->i_fop is not NULL.
+  */
+-int init_private_file(struct file *filp, struct dentry *dentry, int mode)
++int init_private_file_it(struct file *filp, struct dentry *dentry, int mode,
++                         struct lookup_intent *it)
+ {
+       memset(filp, 0, sizeof(*filp));
+       filp->f_mode   = mode;
+@@ -90,12 +91,20 @@ int init_private_file(struct file *filp,
+       filp->f_dentry = dentry;
+       filp->f_uid    = current->fsuid;
+       filp->f_gid    = current->fsgid;
++      if (it)
++              filp->f_it = it;
+       filp->f_op     = dentry->d_inode->i_fop;
+       if (filp->f_op->open)
+               return filp->f_op->open(dentry->d_inode, filp);
+       else
+               return 0;
+ }
++EXPORT_SYMBOL(init_private_file_it);
++
++int init_private_file(struct file *filp, struct dentry *dentry, int mode)
++{
++      return init_private_file_it(filp, dentry, mode, NULL);
++}
+ void fput(struct file * file)
+ {
+--- lum-2.4.20-l27/fs/inode.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:45 2003
++++ lum-2.4.20-l27-phil/fs/inode.c     Thu Sep 25 02:30:56 2003
+@@ -970,9 +970,10 @@ struct inode *igrab(struct inode *inode)
+ }
+-struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
++static inline struct inode *ifind(struct super_block *sb, unsigned long ino,
++                                  struct list_head *head,
++                                  find_inode_t find_actor, void *opaque)
+ {
+-      struct list_head * head = inode_hashtable + hash(sb,ino);
+       struct inode * inode;
+       spin_lock(&inode_lock);
+@@ -985,6 +986,24 @@ struct inode *iget4(struct super_block *
+       }
+       spin_unlock(&inode_lock);
++      return NULL;
++}
++
++struct inode *ilookup4(struct super_block *sb, unsigned long ino,
++                       find_inode_t find_actor, void *opaque)
++{
++      struct list_head * head = inode_hashtable + hash(sb,ino);
++      return ifind(sb, ino, head, find_actor, opaque);
++}
++
++struct inode *iget4(struct super_block *sb, unsigned long ino,
++                    find_inode_t find_actor, void *opaque)
++{
++      struct list_head * head = inode_hashtable + hash(sb,ino);
++      struct inode *inode = ifind(sb, ino, head, find_actor, opaque);
++      if (inode)
++              return inode;
++
+       /*
+        * get_new_inode() will do the right thing, re-trying the search
+        * in case it had to block at any point.
+--- lum-2.4.20-l27/fs/namei.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:45 2003
++++ lum-2.4.20-l27-phil/fs/namei.c     Thu Sep 25 03:48:58 2003
+@@ -22,6 +22,7 @@
+ #include <linux/dnotify.h>
+ #include <linux/smp_lock.h>
+ #include <linux/personality.h>
++#include <linux/module.h>
+ #include <asm/namei.h>
+ #include <asm/uaccess.h>
+@@ -100,6 +101,7 @@ void intent_release(struct lookup_intent
+               it->it_op_release(it);
+ }
++EXPORT_SYMBOL(intent_release);
+ /* In order to reduce some races, while at the same time doing additional
+  * checking and hopefully speeding things up, we copy filenames to the
+@@ -900,7 +902,8 @@ struct dentry * lookup_hash(struct qstr 
+ /* SMP-safe */
+-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
++struct dentry * lookup_one_len_it(const char * name, struct dentry * base,
++                                  int len, struct lookup_intent *it)
+ {
+       unsigned long hash;
+       struct qstr this;
+@@ -920,11 +923,16 @@ struct dentry * lookup_one_len(const cha
+       }
+       this.hash = end_name_hash(hash);
+-      return lookup_hash_it(&this, base, NULL);
++      return lookup_hash_it(&this, base, it);
+ access:
+       return ERR_PTR(-EACCES);
+ }
++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
++{
++      return lookup_one_len_it(name, base, len, NULL);
++}
++
+ /*
+  *    namei()
+  *
+--- lum-2.4.20-l27/fs/nfsd/nfsfh.c~nfs_export_kernel-2.4.20    Thu Nov 28 18:53:15 2002
++++ lum-2.4.20-l27-phil/fs/nfsd/nfsfh.c        Thu Sep 25 02:30:56 2003
+@@ -36,6 +36,15 @@ struct nfsd_getdents_callback {
+       int sequence;           /* sequence counter */
+ };
++static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry)
++{
++      if (inode->i_op->lookup_it)
++          return inode->i_op->lookup_it(inode, dentry, NULL, 0);
++      else
++          return inode->i_op->lookup(inode, dentry);
++              
++}
++
+ /*
+  * A rather strange filldir function to capture
+  * the name matching the specified inode number.
+@@ -75,6 +84,8 @@ static int nfsd_get_name(struct dentry *
+       int error;
+       struct file file;
+       struct nfsd_getdents_callback buffer;
++      struct lookup_intent it;
++      struct file *filp = NULL;
+       error = -ENOTDIR;
+       if (!dir || !S_ISDIR(dir->i_mode))
+@@ -85,9 +96,37 @@ static int nfsd_get_name(struct dentry *
+       /*
+        * Open the directory ...
+        */
+-      error = init_private_file(&file, dentry, FMODE_READ);
+-      if (error)
++      if (dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) &&
++                  (dentry->d_parent == dentry) ) {
++                      it.it_op_release = NULL;
++                      /* 
++                       * XXX Temporary Hack: Simulating init_private_file without
++                       * f_op->open for disconnected dentry Since we don't have actual
++                       * dentry->d_name to revalidate in revalidate_it()
++                       */
++                      filp = &file;
++                      memset(filp, 0, sizeof(*filp));
++                      filp->f_mode   = FMODE_READ;
++                      atomic_set(&filp->f_count, 1);
++                      filp->f_dentry = dentry;
++                      filp->f_uid = current->fsuid;
++                      filp->f_gid = current->fsgid;
++                      filp->f_op = dentry->d_inode->i_fop;
++                      error = 0;
++              } else {
++                      intent_init(&it, IT_OPEN, 0);
++                      error = revalidate_it(dentry, &it);
++                      if (error)
++                              goto out;
++                      error = init_private_file_it(&file, dentry, FMODE_READ, &it);
++              }
++      } else {
++              error = init_private_file_it(&file, dentry, FMODE_READ, NULL);
++      }
++      if (error) 
+               goto out;
++
+       error = -EINVAL;
+       if (!file.f_op->readdir)
+               goto out_close;
+@@ -113,9 +152,13 @@ static int nfsd_get_name(struct dentry *
+       }
+ out_close:
+-      if (file.f_op->release)
++      if (file.f_op->release && !filp)
+               file.f_op->release(dir, &file);
+ out:
++      if (dentry->d_op &&
++          dentry->d_op->d_revalidate_it &&
++          it.it_op_release && !filp)
++              intent_release(&it);
+       return error;
+ }
+@@ -274,7 +317,7 @@ struct dentry *nfsd_findparent(struct de
+        * it is well connected.  But nobody returns different dentrys do they?
+        */
+       down(&child->d_inode->i_sem);
+-      pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry);
++      pdentry = lookup_it(child->d_inode, tdentry);
+       up(&child->d_inode->i_sem);
+       d_drop(tdentry); /* we never want ".." hashed */
+       if (!pdentry && tdentry->d_inode == NULL) {
+@@ -306,6 +349,8 @@ struct dentry *nfsd_findparent(struct de
+                               igrab(tdentry->d_inode);
+                               pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
+                       }
++                      if (child->d_op && child->d_op->d_revalidate_it)
++                              pdentry->d_op = child->d_op;
+               }
+               if (pdentry == NULL)
+                       pdentry = ERR_PTR(-ENOMEM);
+@@ -463,6 +508,8 @@ find_fh_dentry(struct super_block *sb, _
+               struct dentry *pdentry;
+               struct inode *parent;
++              if (result->d_op && result->d_op->d_revalidate_it)
++                      dentry->d_op = result->d_op;
+               pdentry = nfsd_findparent(dentry);
+               err = PTR_ERR(pdentry);
+               if (IS_ERR(pdentry))
+@@ -662,6 +709,11 @@ fh_verify(struct svc_rqst *rqstp, struct
+       inode = dentry->d_inode;
++      /* cache coherency for non-device filesystems */
++      if (inode->i_op && inode->i_op->revalidate_it) {
++          inode->i_op->revalidate_it(dentry, NULL);
++      }
++
+       /* Type check. The correct error return for type mismatches
+        * does not seem to be generally agreed upon. SunOS seems to
+        * use EISDIR if file isn't S_IFREG; a comment in the NFSv3
+@@ -900,8 +952,9 @@ out_negative:
+               dentry->d_parent->d_name.name, dentry->d_name.name);
+       goto out;
+ out_uptodate:
+-      printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
+-              dentry->d_parent->d_name.name, dentry->d_name.name);
++      if(!dentry->d_parent->d_inode->i_op->mkdir_raw)
++              printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
++                      dentry->d_parent->d_name.name, dentry->d_name.name);
+       goto out;
+ }
+--- lum-2.4.20-l27/fs/nfsd/vfs.c~nfs_export_kernel-2.4.20      Thu Nov 28 18:53:15 2002
++++ lum-2.4.20-l27-phil/fs/nfsd/vfs.c  Thu Sep 25 03:05:28 2003
+@@ -77,6 +77,128 @@ struct raparms {
+ static struct raparms *               raparml;
+ static struct raparms *               raparm_cache;
++static int link_raw(struct dentry *dold, struct dentry *ddir,
++                    struct dentry *dnew)
++{
++      int err;
++
++      struct nameidata old_nd = { .dentry = dold };
++      struct nameidata nd = { .dentry = ddir, .last = dnew->d_name };
++      struct inode_operations *op = nd.dentry->d_inode->i_op;
++      err = op->link_raw(&old_nd, &nd);
++      d_instantiate(dnew, dold->d_inode);
++      if(dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it)
++              dold->d_inode->i_op->revalidate_it(dnew, NULL);
++
++      return err;
++}
++
++static int unlink_raw(struct dentry *dentry, char *fname, int flen,
++                      struct dentry *rdentry)
++{
++      int err;
++        struct qstr last = { .name = fname, .len = flen };
++      struct nameidata nd = { .dentry = dentry, .last = last };
++      struct inode_operations *op = nd.dentry->d_inode->i_op;
++      err = op->unlink_raw(&nd);
++      if (!err)
++              d_delete(rdentry);
++
++      return err;
++}
++
++static int rmdir_raw(struct dentry *dentry, char *fname, int flen,
++                     struct dentry *rdentry)
++{
++      int err;
++        struct qstr last = { .name = fname, .len = flen };
++      struct nameidata nd = { .dentry = dentry, .last = last };
++      struct inode_operations *op = nd.dentry->d_inode->i_op;
++      err = op->rmdir_raw(&nd);
++      if(!err) {
++              rdentry->d_inode->i_flags |= S_DEAD;
++              d_delete(rdentry);
++      }
++
++      return err;
++}
++
++static int symlink_raw(struct dentry *dentry,  char *fname, int flen,
++                       char *path)
++{
++      int err;
++        struct qstr last = { .name = fname, .len = flen };
++      struct nameidata nd = { .dentry = dentry, .last = last };
++      struct inode_operations *op = nd.dentry->d_inode->i_op;
++      err = op->symlink_raw(&nd, path);
++
++      return err;
++}
++
++static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode)
++{
++      int err;
++        struct qstr last = { .name = fname, .len = flen };
++      struct nameidata nd = { .dentry = dentry, .last = last };
++      struct inode_operations *op = nd.dentry->d_inode->i_op;
++      err = op->mkdir_raw(&nd, mode);
++
++      return err;
++}
++
++static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode,
++                     dev_t dev)
++{
++      int err;
++        struct qstr last = { .name = fname, .len = flen };
++      struct nameidata nd = { .dentry = dentry, .last = last };
++      struct inode_operations *op = nd.dentry->d_inode->i_op;
++      err = op->mknod_raw(&nd, mode, dev);
++
++      return err;
++}     
++
++static int rename_raw(struct dentry *fdentry, struct dentry *tdentry,
++                      struct dentry *odentry, struct dentry *ndentry)
++{
++      int err;
++
++      struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name};
++      struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name};
++      struct inode_operations *op = old_nd.dentry->d_inode->i_op;
++      err = op->rename_raw(&old_nd, &new_nd);
++      d_move(odentry, ndentry);
++
++      return err;
++}
++
++static int setattr_raw(struct inode *inode, struct iattr *iap)
++{
++      int err;
++
++      iap->ia_valid |= ATTR_RAW;
++      err = inode->i_op->setattr_raw(inode, iap);
++
++      return err;
++}
++
++int revalidate_it(struct dentry *dentry, struct lookup_intent *it)
++{
++      int err = 0;
++
++      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++              if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
++                      !d_invalidate(dentry)) {
++                      dput(dentry);
++                      err = -EINVAL;
++                      dentry = NULL;
++                      return err;
++              }
++      }
++
++      return err;
++}
++
+ /*
+  * Look up one component of a pathname.
+  * N.B. After this call _both_ fhp and resfh need an fh_put
+@@ -300,7 +422,10 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+       }
+       err = nfserr_notsync;
+       if (!check_guard || guardtime == inode->i_ctime) {
+-              err = notify_change(dentry, iap);
++              if ( dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw)
++                      err = setattr_raw(dentry->d_inode, iap);
++              else
++                      err = notify_change(dentry, iap);
+               err = nfserrno(err);
+       }
+       if (size_change) {
+@@ -427,6 +552,7 @@ nfsd_open(struct svc_rqst *rqstp, struct
+ {
+       struct dentry   *dentry;
+       struct inode    *inode;
++      struct lookup_intent it;
+       int             err;
+       /* If we get here, then the client has already done an "open", and (hopefully)
+@@ -473,6 +599,14 @@ nfsd_open(struct svc_rqst *rqstp, struct
+               filp->f_mode  = FMODE_READ;
+       }
++      intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode);
++
++      err = revalidate_it(dentry, &it);
++      if (err)
++              goto out_nfserr;
++      
++      filp->f_it = &it;
++      
+       err = 0;
+       if (filp->f_op && filp->f_op->open) {
+               err = filp->f_op->open(inode, filp);
+@@ -487,7 +621,11 @@ nfsd_open(struct svc_rqst *rqstp, struct
+                       atomic_dec(&filp->f_count);
+               }
+       }
++
+ out_nfserr:
++      if (it.it_op_release)
++              intent_release(&it);
++
+       if (err)
+               err = nfserrno(err);
+ out:
+@@ -818,7 +956,7 @@ nfsd_create(struct svc_rqst *rqstp, stru
+ {
+       struct dentry   *dentry, *dchild;
+       struct inode    *dirp;
+-      int             err;
++      int             err, error = -EOPNOTSUPP;
+       err = nfserr_perm;
+       if (!flen)
+@@ -834,20 +972,44 @@ nfsd_create(struct svc_rqst *rqstp, stru
+       dentry = fhp->fh_dentry;
+       dirp = dentry->d_inode;
++      switch (type) {
++                      case S_IFDIR:
++                              if (dirp->i_op->mkdir_raw)
++                          error = mkdir_raw(dentry, fname, flen, iap->ia_mode);
++                              break;
++                      case S_IFCHR:
++                      case S_IFBLK:
++                      case S_IFIFO:
++                      case S_IFSOCK:
++                      case S_IFREG:
++                          if (dirp->i_op->mknod_raw) {
++                                      if (type == S_IFREG)
++                                              rdev = 0;
++                                      error = mknod_raw(dentry, fname, flen, iap->ia_mode, rdev);
++                              }
++                              break;
++                              default:
++                      printk("nfsd: bad file type %o in nfsd_create\n", type);
++      }
++
+       err = nfserr_notdir;
+-      if(!dirp->i_op || !dirp->i_op->lookup)
++      if(!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it))
+               goto out;
+       /*
+        * Check whether the response file handle has been verified yet.
+        * If it has, the parent directory should already be locked.
+        */
+-      if (!resfhp->fh_dentry) {
+-              /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
+-              fh_lock(fhp);
++      if (!resfhp->fh_dentry || dirp->i_op->lookup_it) {
++              /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create
++                 and nfsd_proc_create in case of lustre
++              */
++              if (!resfhp->fh_dentry)
++                      fh_lock(fhp);
+               dchild = lookup_one_len(fname, dentry, flen);
+               err = PTR_ERR(dchild);
+               if (IS_ERR(dchild))
+                       goto out_nfserr;
++              resfhp->fh_dentry = NULL;
+               err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+               if (err)
+                       goto out;
+@@ -868,10 +1030,12 @@ nfsd_create(struct svc_rqst *rqstp, stru
+        * Make sure the child dentry is still negative ...
+        */
+       err = nfserr_exist;
+-      if (dchild->d_inode) {
+-              dprintk("nfsd_create: dentry %s/%s not negative!\n",
+-                      dentry->d_name.name, dchild->d_name.name);
+-              goto out; 
++      if ( error == -EOPNOTSUPP) {
++              if (dchild->d_inode) {
++                      dprintk("nfsd_create: dentry %s/%s not negative!\n",
++                              dentry->d_name.name, dchild->d_name.name);
++                      goto out; 
++              }
+       }
+       if (!(iap->ia_valid & ATTR_MODE))
+@@ -884,16 +1048,19 @@ nfsd_create(struct svc_rqst *rqstp, stru
+       err = nfserr_perm;
+       switch (type) {
+       case S_IFREG:
+-              err = vfs_create(dirp, dchild, iap->ia_mode);
++              if (error == -EOPNOTSUPP)
++                      err = vfs_create(dirp, dchild, iap->ia_mode);
+               break;
+       case S_IFDIR:
+-              err = vfs_mkdir(dirp, dchild, iap->ia_mode);
++              if (error == -EOPNOTSUPP)
++                      err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+               break;
+       case S_IFCHR:
+       case S_IFBLK:
+       case S_IFIFO:
+       case S_IFSOCK:
+-              err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
++              if (error == -EOPNOTSUPP)       
++                      err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+               break;
+       default:
+               printk("nfsd: bad file type %o in nfsd_create\n", type);
+@@ -962,7 +1129,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
+       /* Get all the sanity checks out of the way before
+        * we lock the parent. */
+       err = nfserr_notdir;
+-      if(!dirp->i_op || !dirp->i_op->lookup)
++      if (dirp->i_op->mknod_raw) {
++              err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0);
++              if (err && err != -EOPNOTSUPP)
++                      goto out;
++      }
++
++      if(!dirp->i_op ||  !(dirp->i_op->lookup || dirp->i_op->lookup_it))
+               goto out;
+       fh_lock(fhp);
+@@ -1013,6 +1186,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, s
+               case NFS3_CREATE_GUARDED:
+                       err = nfserr_exist;
+               }
++              if(dirp->i_op->mknod_raw)
++                      err = 0;
+               goto out;
+       }
+@@ -1119,7 +1294,7 @@ nfsd_symlink(struct svc_rqst *rqstp, str
+                               struct iattr *iap)
+ {
+       struct dentry   *dentry, *dnew;
+-      int             err, cerr;
++      int             err, cerr, error = -EOPNOTSUPP;
+       err = nfserr_noent;
+       if (!flen || !plen)
+@@ -1133,12 +1308,18 @@ nfsd_symlink(struct svc_rqst *rqstp, str
+               goto out;
+       fh_lock(fhp);
+       dentry = fhp->fh_dentry;
++      
++      if (dentry->d_inode->i_op->symlink_raw)
++              error = symlink_raw(dentry, fname, flen, path);
++
+       dnew = lookup_one_len(fname, dentry, flen);
+       err = PTR_ERR(dnew);
+       if (IS_ERR(dnew))
+               goto out_nfserr;
+-      err = vfs_symlink(dentry->d_inode, dnew, path);
++      err = error;
++      if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw)
++              err = vfs_symlink(dentry->d_inode, dnew, path);
+       if (!err) {
+               if (EX_ISSYNC(fhp->fh_export))
+                       nfsd_sync_dir(dentry);
+@@ -1148,7 +1329,10 @@ nfsd_symlink(struct svc_rqst *rqstp, str
+                               iap->ia_valid |= ATTR_CTIME;
+                               iap->ia_mode = (iap->ia_mode&S_IALLUGO)
+                                       | S_IFLNK;
+-                              err = notify_change(dnew, iap);
++                              if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw)
++                                      err = setattr_raw(dnew->d_inode, iap);
++                              else
++                                      err = notify_change(dnew, iap);
+                               if (!err && EX_ISSYNC(fhp->fh_export))
+                                       write_inode_now(dentry->d_inode, 1);
+                      }
+@@ -1206,7 +1390,10 @@ nfsd_link(struct svc_rqst *rqstp, struct
+       dold = tfhp->fh_dentry;
+       dest = dold->d_inode;
+-      err = vfs_link(dold, dirp, dnew);
++      if (dirp->i_op->link_raw)
++              err = link_raw(dold, ddir, dnew);
++      else
++              err = vfs_link(dold, dirp, dnew);
+       if (!err) {
+               if (EX_ISSYNC(ffhp->fh_export)) {
+                       nfsd_sync_dir(ddir);
+@@ -1291,7 +1478,10 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+                       err = nfserr_perm;
+       } else
+ #endif
+-      err = vfs_rename(fdir, odentry, tdir, ndentry);
++      if(fdir->i_op->rename_raw)
++              err = rename_raw(fdentry, tdentry, odentry, ndentry);
++      else
++              err = vfs_rename(fdir, odentry, tdir, ndentry);
+       if (!err && EX_ISSYNC(tfhp->fh_export)) {
+               nfsd_sync_dir(tdentry);
+               nfsd_sync_dir(fdentry);
+@@ -1312,7 +1502,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+       fill_post_wcc(tfhp);
+       double_up(&tdir->i_sem, &fdir->i_sem);
+       ffhp->fh_locked = tfhp->fh_locked = 0;
+-      
++
+ out:
+       return err;
+ }
+@@ -1358,9 +1548,15 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+                       err = nfserr_perm;
+               } else
+ #endif
+-              err = vfs_unlink(dirp, rdentry);
++              if (dirp->i_op->unlink_raw)
++                      err = unlink_raw(dentry, fname, flen, rdentry);
++              else
++                      err = vfs_unlink(dirp, rdentry);
+       } else { /* It's RMDIR */
+-              err = vfs_rmdir(dirp, rdentry);
++              if (dirp->i_op->rmdir_raw)
++                      err = rmdir_raw(dentry, fname, flen, rdentry);
++              else
++                      err = vfs_rmdir(dirp, rdentry);
+       }
+       dput(rdentry);
+--- lum-2.4.20-l27/fs/super.c~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:45 2003
++++ lum-2.4.20-l27-phil/fs/super.c     Thu Sep 25 02:30:56 2003
+@@ -27,6 +27,7 @@
+ #include <linux/devfs_fs_kernel.h>
+ #include <linux/major.h>
+ #include <linux/acct.h>
++#include <linux/fs.h>
+ #include <asm/uaccess.h>
+@@ -51,7 +52,7 @@ spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
+  */
+ static struct file_system_type *file_systems;
+-static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
++rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
+ /* WARNING: This can be used only if we _already_ own a reference */
+ static void get_filesystem(struct file_system_type *fs)
+--- lum-2.4.20-l27/include/linux/fs.h~nfs_export_kernel-2.4.20 Thu Sep 25 02:30:55 2003
++++ lum-2.4.20-l27-phil/include/linux/fs.h     Thu Sep 25 03:25:37 2003
+@@ -1098,6 +1098,9 @@ extern int open_namei_it(const char *fil
+                        struct nameidata *nd, struct lookup_intent *it);
+ extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
+                           int flags, struct lookup_intent *it);
++extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it);
++extern int init_private_file_it(struct file *, struct dentry *dentry, int mode,
++                                struct lookup_intent *it);
+ extern int filp_close(struct file *, fl_owner_t id);
+ extern char * getname(const char *);
+@@ -1368,6 +1371,8 @@ extern void path_release(struct nameidat
+ extern int follow_down(struct vfsmount **, struct dentry **);
+ extern int follow_up(struct vfsmount **, struct dentry **);
+ extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
++extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int,
++                                         struct lookup_intent *);
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+ #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
+ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
+@@ -1381,6 +1386,8 @@ extern ino_t iunique(struct super_block 
+ typedef int (*find_inode_t)(struct inode *, unsigned long, void *);
+ extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *);
++extern struct inode * ilookup4(struct super_block *, unsigned long,
++                               find_inode_t, void *);
+ static inline struct inode *iget(struct super_block *sb, unsigned long ino)
+ {
+       return iget4(sb, ino, NULL, NULL);
+@@ -1496,6 +1503,7 @@ extern int dcache_dir_fsync(struct file 
+ extern int dcache_readdir(struct file *, void *, filldir_t);
+ extern struct file_operations dcache_dir_ops;
++extern rwlock_t file_systems_lock;
+ extern struct file_system_type *get_fs_type(const char *name);
+ extern struct super_block *get_super(kdev_t);
+ extern void drop_super(struct super_block *sb);
+--- lum-2.4.20-l27/kernel/ksyms.c~nfs_export_kernel-2.4.20     Thu Sep 25 02:30:49 2003
++++ lum-2.4.20-l27-phil/kernel/ksyms.c Thu Sep 25 02:30:56 2003
+@@ -146,6 +146,7 @@ EXPORT_SYMBOL(fget);
+ EXPORT_SYMBOL(igrab);
+ EXPORT_SYMBOL(iunique);
+ EXPORT_SYMBOL(iget4);
++EXPORT_SYMBOL(ilookup4);
+ EXPORT_SYMBOL(iput);
+ EXPORT_SYMBOL(force_delete);
+ EXPORT_SYMBOL(follow_up);
+@@ -156,6 +157,7 @@ EXPORT_SYMBOL(path_walk);
+ EXPORT_SYMBOL(path_release);
+ EXPORT_SYMBOL(__user_walk);
+ EXPORT_SYMBOL(lookup_one_len);
++EXPORT_SYMBOL(lookup_one_len_it);
+ EXPORT_SYMBOL(lookup_hash);
+ EXPORT_SYMBOL(sys_close);
+ EXPORT_SYMBOL(dcache_lock);
+@@ -590,3 +592,4 @@ EXPORT_SYMBOL(pidhash);
+ /* debug */
+ EXPORT_SYMBOL(dump_stack);
++EXPORT_SYMBOL(file_systems_lock);
+
+_
diff --git a/lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc b/lustre/kernel_patches/pc/nfs_export_kernel-2.4.20.pc
new file mode 100644 (file)
index 0000000..622704f
--- /dev/null
@@ -0,0 +1,9 @@
+fs/Makefile
+fs/file_table.c
+fs/inode.c
+fs/namei.c
+fs/nfsd/nfsfh.c
+fs/nfsd/vfs.c
+fs/super.c
+include/linux/fs.h
+kernel/ksyms.c
index d633069..9b0d77e 100644 (file)
@@ -135,6 +135,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
         struct lustre_handle mdc_conn = {0, };
         struct lustre_md md;
         class_uuid_t uuid;
+        kdev_t devno;
 
         ENTRY;
 
@@ -192,6 +193,13 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_blocksize_bits = log2(osfs.os_bsize);
         sb->s_magic = LL_SUPER_MAGIC;
         sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
+        
+        devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, 
+                             strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid));
+        write_lock(&file_systems_lock);
+        sb->s_type->fs_flags = FS_REQUIRES_DEV;
+        write_unlock(&file_systems_lock);
+        sb->s_dev = devno;
 
         obd = class_name2obd(osc);
         if (!obd) {
@@ -274,6 +282,10 @@ void ll_put_super(struct super_block *sb)
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+        write_lock(&file_systems_lock);
+        sb->s_type->fs_flags = 0;
+        write_unlock(&file_systems_lock);
+        
         list_del(&sbi->ll_conn_chain);
         obd_disconnect(sbi->ll_osc_exp, 0);
 
@@ -312,8 +324,8 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
 {
         struct inode *inode;
         l_lock(&lock->l_resource->lr_namespace->ns_lock);
-        if (lock->l_data)
-                inode = igrab(lock->l_data);
+        if (lock->l_ast_data)
+                inode = igrab(lock->l_ast_data);
         else
                 inode = NULL;
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
@@ -322,8 +334,8 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
 
 static int null_if_equal(struct ldlm_lock *lock, void *data)
 {
-        if (data == lock->l_data)
-                lock->l_data = NULL;
+        if (data == lock->l_ast_data)
+                lock->l_ast_data = NULL;
 
         if (lock->l_req_mode != lock->l_granted_mode)
                 return LDLM_ITER_STOP;
@@ -904,7 +916,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode,
         } else {
                 LASSERT(sb);
                 *inode = ll_iget(sb, md.body->ino, &md);
-                if (!*inode) {
+                if (*inode == NULL || is_bad_inode(*inode)) {
                         /* free the lsm if we allocated one above */
                         if (md.lsm != NULL)
                                 obd_free_memmd(exp, &md.lsm);
diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c
new file mode 100644 (file)
index 0000000..046e959
--- /dev/null
@@ -0,0 +1,177 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *   NFS export of Lustre Light File System 
+ *
+ *   Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+#include <linux/lustre_lite.h>
+#include "llite_internal.h"
+
+__u32 get_uuid2int(const char *name, int len)
+{
+        __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9;
+        while (len--) {
+                __u32 key = key1 + (key0 ^ (*name++ * 7152373));
+                if (key & 0x80000000) key -= 0x7fffffff;
+                key1 = key0;
+                key0 = key;
+        }
+        return (key0 << 1);
+}
+
+static struct inode * search_inode_for_lustre(struct super_block *sb,
+                                              unsigned long ino,
+                                              unsigned long generation,
+                                              int mode)
+{
+        struct ptlrpc_request *req = NULL;
+        struct ll_sb_info *sbi = ll_s2sbi(sb);
+        struct ll_fid fid;
+        unsigned long valid = 0;
+        int eadatalen = 0, rc;
+        struct inode *inode = NULL;
+
+        inode = ilookup4(sb, ino, NULL, NULL);
+        if (inode)
+                return inode;
+        if (S_ISREG(mode)) {
+                eadatalen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
+                valid |= OBD_MD_FLEASIZE;
+        }
+        fid.id = (__u64)ino;
+        fid.generation = generation;
+        fid.f_type = mode;
+
+        rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
+        if (rc) {
+                CERROR("failure %d inode %lu\n", rc, ino);
+                return ERR_PTR(rc);
+        }
+
+        rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, 0, sb);
+        if (rc) {
+                ptlrpc_req_finished(req);
+                return ERR_PTR(rc);
+        }
+        ptlrpc_req_finished(req);
+
+        return inode;
+}
+
+extern struct dentry_operations ll_d_ops;
+
+static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
+                                      __u32 generation, umode_t mode)
+{                                      
+        struct inode *inode;      
+        struct dentry *result;
+        struct list_head *lp;
+
+        if (ino == 0)
+                return ERR_PTR(-ESTALE);
+
+        inode = search_inode_for_lustre(sb, ino, generation, mode);
+        if (IS_ERR(inode)) {
+                return ERR_PTR(PTR_ERR(inode));
+        }
+        if (is_bad_inode(inode) 
+            || (generation && inode->i_generation != generation)
+            ){
+                /* we didn't find the right inode.. */
+              CERROR(" Inode %lu, Bad count: %d %d or version  %u %u\n",
+                        inode->i_ino, 
+                        inode->i_nlink, 
+                        atomic_read(&inode->i_count), 
+                        inode->i_generation, 
+                        generation);
+                iput(inode);
+                return ERR_PTR(-ESTALE);
+        }
+        
+        /* now to find a dentry.
+         * If possible, get a well-connected one
+         */
+        spin_lock(&dcache_lock);
+        for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
+                result = list_entry(lp,struct dentry, d_alias);
+                if (!(result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
+                        dget_locked(result);
+                        result->d_vfs_flags |= DCACHE_REFERENCED;
+                        spin_unlock(&dcache_lock);
+                        iput(inode);
+                        return result;
+                }
+        }
+        spin_unlock(&dcache_lock);
+        result = d_alloc_root(inode);
+        if (result == NULL) {
+                iput(inode);
+                return ERR_PTR(-ENOMEM);
+        }
+        result->d_flags |= DCACHE_NFSD_DISCONNECTED;
+        ll_set_dd(result);
+        result->d_op = &ll_d_ops;
+        return result;
+}
+
+struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
+                               int fhtype, int parent)
+{
+        switch (fhtype) {
+                case 2:
+                        if (len < 5)
+                                break;
+                        if (parent)
+                                return ll_iget_for_nfs(sb, data[3], 0, data[4]);
+                case 1:
+                        if (len < 3)
+                                break;
+                        if (parent)
+                                break;
+                        return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
+                default: break;
+        }
+        return ERR_PTR(-EINVAL);
+}
+
+int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
+                    int need_parent)
+{
+        if (*lenp < 3)
+                return 255;
+        *datap++ = dentry->d_inode->i_ino;
+        *datap++ = dentry->d_inode->i_generation;
+        *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
+
+        if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
+                *lenp = 3;
+                return 1;
+        }
+        if (dentry->d_parent) { 
+                *datap++ = dentry->d_parent->d_inode->i_ino;
+                *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
+         
+                *lenp = 5;
+                return 2;
+        }
+        *lenp = 3;
+        return 1;
+}
index c9b7691..9db5db2 100644 (file)
@@ -102,11 +102,12 @@ static int osc_rpcd_check(struct osc_rpcd_ctl *orc)
                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
                 list_del_init(&req->rq_set_chain);
                 ptlrpc_set_add_req(orc->orc_set, req);
+                rc = 1; /* need to calculate its timeout */
         }
         spin_unlock_irqrestore(&orc->orc_set->set_new_req_lock, flags);
 
         if (orc->orc_set->set_remaining) {
-                rc = ptlrpc_check_set(orc->orc_set);
+                rc = rc | ptlrpc_check_set(orc->orc_set);
 
                 /* XXX our set never completes, so we prune the completed
                  * reqs after each iteration. boy could this be smarter. */
diff --git a/lustre/tests/createmany-mpi.c b/lustre/tests/createmany-mpi.c
new file mode 100644 (file)
index 0000000..1474b7b
--- /dev/null
@@ -0,0 +1,131 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "mpi.h"
+
+void usage(char *prog)
+{
+       printf("usage: %s {-o|-m} filenamefmt count\n", prog);
+       printf("       %s {-o|-m} filenamefmt -seconds\n", prog);
+       printf("       %s {-o|-m} filenamefmt start count\n", prog);
+}
+
+/* Print process rank, loop count, message, and exit (i.e. a fatal error) */
+int rprintf(int rank, int loop, const char *fmt, ...)
+{
+        va_list       ap;
+
+        printf("rank %d, loop %d: ", rank, loop);
+
+        va_start(ap, fmt);
+
+        printf(fmt, ap);
+
+        MPI_Finalize();
+        exit(1);
+}
+
+int main(int argc, char ** argv)
+{
+        int i, rc = 0, do_open, rank;
+        char format[4096], *fmt;
+        char filename[4096];
+        long start, last, end;
+       long begin = 0, count;
+
+        rc = MPI_Init(&argc, &argv);
+        if (rc != MPI_SUCCESS)
+                rprintf(-1, -1, "MPI_Init failed: %d\n", rc);
+
+        if (argc < 4 || argc > 5) {
+               usage(argv[0]);
+                return 1;
+        }
+
+        if (strcmp(argv[1], "-o") == 0) {
+                do_open = 1;
+        } else if (strcmp(argv[1], "-m") == 0) {
+                do_open = 0;
+        } else {
+               usage(argv[0]);
+                return 1;
+        }
+
+        if (strlen(argv[2]) > 4080) {
+                printf("name too long\n");
+                return 1;
+        }
+
+        rc = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+        if (rc != MPI_SUCCESS)
+                rprintf(-1, -1, "MPI_Comm_rank failed: %d\n", rc);
+
+        rc = MPI_Barrier(MPI_COMM_WORLD);
+        if (rc != MPI_SUCCESS)
+                rprintf(rank, -1, "prep MPI_Barrier failed: %d\n", rc);
+
+        start = last = time(0);
+
+       if (argc == 4) {
+               end = strtol(argv[3], NULL, 0);
+               if (end > 0) {
+                       count = end;
+                       end = -1UL >> 1;
+               } else {
+                       end = start - end;
+                       count = -1UL >> 1;
+               }
+       } else {
+               end = -1UL >> 1;
+               begin = strtol(argv[3], NULL, 0);
+               count = strtol(argv[4], NULL, 0);
+       }
+
+       if (strchr(argv[2], '%'))
+               fmt = argv[2];
+       else {
+               sprintf(format, "%s%%d", argv[2]);
+               fmt = format;
+       }
+       printf("starting at %s", ctime(&start));
+        for (i = 0; i < count && time(0) < end; i++, begin++) {
+                sprintf(filename, fmt, begin);
+                if (do_open) {
+                        int fd = open(filename, O_CREAT|O_RDWR, 0644);
+                        if (fd < 0) {
+                                printf("open(%s) error: %s\n", filename,
+                                       strerror(errno));
+                                rc = errno;
+                                break;
+                        }
+                        close(fd);
+                } else {
+                        rc = mknod(filename, S_IFREG| 0444, 0);
+                        if (rc) {
+                                printf("mknod(%s) error: %s\n",
+                                       filename, strerror(errno));
+                                rc = errno;
+                                break;
+                        }
+                }
+                if ((i % 10000) == 0) {
+                        printf(" - created %d (time %ld ; total %ld ; last %ld)\n",
+                               i, time(0), time(0) - start, time(0) - last);
+                        last = time(0);
+                }
+        }
+        printf("total: %d creates in %ld seconds: %f creates/second\n", i,
+               time(0) - start, ((float)i / (time(0) - start)));
+       start = time(0);
+       printf("finish at %s", ctime(&start));
+
+        return rc;
+}