Whamcloud - gitweb
LU-1994 llite: atomic_open support
authorPeng Tao <tao.peng@emc.com>
Tue, 28 Aug 2012 01:53:27 +0000 (09:53 +0800)
committerOleg Drokin <green@whamcloud.com>
Fri, 4 Jan 2013 15:30:50 +0000 (10:30 -0500)
v3.6 adds iop->atomic_open, to facilitate atomic lookup/create/open.
In the meantime, struct open_intent is removed from nameidata, and
->lookup()/->create() on longer carries intent data. So Lustre has
to support atomic_open in order to preserve intent functionality.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Change-Id: Ibf4eee5a98c7a86417dc22b0075f647600328668
Reviewed-on: http://review.whamcloud.com/4387
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Yang Sheng <yang.sheng@intel.com>
Reviewed-by: Lai Siyao <laisiyao@whamcloud.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/autoconf/lustre-core.m4
lustre/llite/dcache.c
lustre/llite/llite_internal.h
lustre/llite/namei.c

index 109d528..4cc4bbf 100644 (file)
@@ -2176,6 +2176,25 @@ EXTRA_KCFLAGS="$tmp_flags"
 ])
 
 #
 ])
 
 #
+# 3.6 vfs adds iop->atomic_open
+#
+AC_DEFUN([LC_HAVE_IOP_ATOMIC_OPEN],
+[AC_MSG_CHECKING([if iop has atomic_open])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/fs.h>
+],[
+       struct inode_operations iop;
+       iop.atomic_open = NULL;
+],[
+       AC_DEFINE(HAVE_IOP_ATOMIC_OPEN, 1,
+                 [have iop atomic_open])
+       AC_MSG_RESULT([yes])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
 # LC_PROG_LINUX
 #
 # Lustre linux kernel checks
 # LC_PROG_LINUX
 #
 # Lustre linux kernel checks
@@ -2348,6 +2367,7 @@ AC_DEFUN([LC_PROG_LINUX],
         # 3.6
         LC_HAVE_DENTRY_D_ALIAS_HLIST
         LC_DENTRY_OPEN_USE_PATH
         # 3.6
         LC_HAVE_DENTRY_D_ALIAS_HLIST
         LC_DENTRY_OPEN_USE_PATH
+        LC_HAVE_IOP_ATOMIC_OPEN
 
         #
         if test x$enable_server = xyes ; then
 
         #
         if test x$enable_server = xyes ; then
index ffd04f5..6ce32f7 100644 (file)
@@ -642,6 +642,34 @@ out_sa:
         goto mark;
 }
 
         goto mark;
 }
 
+#ifdef HAVE_IOP_ATOMIC_OPEN
+/*
+ * Always trust cached dentries. Update statahead window if necessary.
+ */
+int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
+{
+       struct inode *parent = dentry->d_parent->d_inode;
+       int unplug = 0;
+
+       ENTRY;
+       CDEBUG(D_VFSTRACE, "VFS Op:name=%s,flags=%u\n",
+              dentry->d_name.name, flags);
+
+       if (!(flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) &&
+           ll_need_statahead(parent, dentry) > 0) {
+               if (flags & LOOKUP_RCU)
+                       RETURN(-ECHILD);
+
+               if (dentry->d_inode == NULL)
+                       unplug = 1;
+               do_statahead_enter(parent, &dentry, unplug);
+               ll_statahead_mark(parent, dentry);
+       }
+
+       RETURN(1);
+}
+
+#else /* !HAVE_IOP_ATOMIC_OPEN */
 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
 {
         int rc;
 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
 {
         int rc;
@@ -709,6 +737,7 @@ out_it:
 
         RETURN(rc);
 }
 
         RETURN(rc);
 }
+#endif /* HAVE_IOP_ATOMIC_OPEN */
 
 void ll_d_iput(struct dentry *de, struct inode *inode)
 {
 
 void ll_d_iput(struct dentry *de, struct inode *inode)
 {
index c8f0f3b..7681b28 100644 (file)
@@ -673,10 +673,10 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
                       struct lustre_md *lic);
 int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
                        void *data, int flag);
                       struct lustre_md *lic);
 int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
                        void *data, int flag);
+#ifndef HAVE_IOP_ATOMIC_OPEN
 struct lookup_intent *ll_convert_intent(struct open_intent *oit,
                                         int lookup_flags);
 struct lookup_intent *ll_convert_intent(struct open_intent *oit,
                                         int lookup_flags);
-int ll_lookup_it_finish(struct ptlrpc_request *request,
-                        struct lookup_intent *it, void *data);
+#endif
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
 
 /* llite/rw.c */
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
 
 /* llite/rw.c */
@@ -708,7 +708,11 @@ extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
                                    struct lustre_handle *lockh, __u64 flags);
 int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
                              __u64 bits);
                                    struct lustre_handle *lockh, __u64 flags);
 int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
                              __u64 bits);
+#ifdef HAVE_IOP_ATOMIC_OPEN
+int ll_revalidate_nd(struct dentry *dentry, unsigned int flags);
+#else
 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd);
 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd);
+#endif
 int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 int ll_glimpse_ioctl(struct ll_sb_info *sbi,
 int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 int ll_glimpse_ioctl(struct ll_sb_info *sbi,
@@ -1285,44 +1289,56 @@ ll_statahead_mark(struct inode *dir, struct dentry *dentry)
 }
 
 static inline int
 }
 
 static inline int
-ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
+ll_need_statahead(struct inode *dir, struct dentry *dentryp)
 {
 {
-        struct ll_inode_info  *lli;
-        struct ll_dentry_data *ldd;
+       struct ll_inode_info  *lli;
+       struct ll_dentry_data *ldd;
 
 
-        if (ll_i2sbi(dir)->ll_sa_max == 0)
-                return -EAGAIN;
+       if (ll_i2sbi(dir)->ll_sa_max == 0)
+               return -EAGAIN;
 
 
-        lli = ll_i2info(dir);
-        /* not the same process, don't statahead */
-        if (lli->lli_opendir_pid != cfs_curproc_pid())
-                return -EAGAIN;
+       lli = ll_i2info(dir);
+       /* not the same process, don't statahead */
+       if (lli->lli_opendir_pid != cfs_curproc_pid())
+               return -EAGAIN;
 
        /* statahead has been stopped */
        if (lli->lli_opendir_key == NULL)
                return -EAGAIN;
 
 
        /* statahead has been stopped */
        if (lli->lli_opendir_key == NULL)
                return -EAGAIN;
 
-        ldd = ll_d2d(*dentryp);
-        /*
-         * When stats a dentry, the system trigger more than once "revalidate"
-         * or "lookup", for "getattr", for "getxattr", and maybe for others.
-         * Under patchless client mode, the operation intent is not accurate,
-         * which maybe misguide the statahead thread. For example:
-         * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
-         * have the same operation intent -- "IT_GETATTR".
-         * In fact, one dentry should has only one chance to interact with the
-         * statahead thread, otherwise the statahead windows will be confused.
-         * The solution is as following:
-         * Assign "lld_sa_generation" with "sai_generation" when a dentry
-         * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
-         * will bypass interacting with statahead thread for checking:
-         * "lld_sa_generation == lli_sai->sai_generation"
-         */
-        if (ldd && lli->lli_sai &&
-            ldd->lld_sa_generation == lli->lli_sai->sai_generation)
-                return -EAGAIN;
+       ldd = ll_d2d(dentryp);
+       /*
+        * When stats a dentry, the system trigger more than once "revalidate"
+        * or "lookup", for "getattr", for "getxattr", and maybe for others.
+        * Under patchless client mode, the operation intent is not accurate,
+        * which maybe misguide the statahead thread. For example:
+        * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
+        * have the same operation intent -- "IT_GETATTR".
+        * In fact, one dentry should has only one chance to interact with the
+        * statahead thread, otherwise the statahead windows will be confused.
+        * The solution is as following:
+        * Assign "lld_sa_generation" with "sai_generation" when a dentry
+        * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
+        * will bypass interacting with statahead thread for checking:
+        * "lld_sa_generation == lli_sai->sai_generation"
+        */
+       if (ldd && lli->lli_sai &&
+           ldd->lld_sa_generation == lli->lli_sai->sai_generation)
+               return -EAGAIN;
+
+       return 1;
+}
+
+static inline int
+ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
+{
+       int ret;
+
+       ret = ll_need_statahead(dir, *dentryp);
+       if (ret <= 0)
+               return ret;
 
 
-        return do_statahead_enter(dir, dentryp, only_unplug);
+       return do_statahead_enter(dir, dentryp, only_unplug);
 }
 
 /* llite ioctl register support rountine */
 }
 
 /* llite ioctl register support rountine */
index 6fa058c..48303a3 100644 (file)
@@ -51,6 +51,9 @@
 #include <lustre_ver.h>
 #include "llite_internal.h"
 
 #include <lustre_ver.h>
 #include "llite_internal.h"
 
+static int ll_create_it(struct inode *, struct dentry *,
+                       int, struct lookup_intent *);
+
 /*
  * Check if we have something mounted at the named dchild.
  * In such a case there would always be dentry present.
 /*
  * Check if we have something mounted at the named dchild.
  * In such a case there would always be dentry present.
@@ -421,6 +424,8 @@ int ll_lookup_it_finish(struct ptlrpc_request *request,
 
        /* NB 1 request reference will be taken away by ll_intent_lock()
         * when I return */
 
        /* NB 1 request reference will be taken away by ll_intent_lock()
         * when I return */
+       CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it,
+              it->d.lustre.it_disposition);
        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
                 rc = ll_prep_inode(&inode, request, (*de)->d_sb);
                 if (rc)
        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
                 rc = ll_prep_inode(&inode, request, (*de)->d_sb);
                 if (rc)
@@ -438,13 +443,21 @@ int ll_lookup_it_finish(struct ptlrpc_request *request,
                    Also see bug 7198. */
        }
 
                    Also see bug 7198. */
        }
 
-       *de = ll_splice_alias(inode, *de);
+       /* Only hash *de if it is unhashed (new dentry).
+        * Atoimc_open may passin hashed dentries for open.
+        */
+       if (d_unhashed(*de))
+               *de = ll_splice_alias(inode, *de);
 
        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
                /* we have lookup look - unhide dentry */
                if (bits & MDS_INODELOCK_LOOKUP)
                        d_lustre_revalidate(*de);
 
        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
                /* we have lookup look - unhide dentry */
                if (bits & MDS_INODELOCK_LOOKUP)
                        d_lustre_revalidate(*de);
-       } else {
+       } else if (!it_disposition(it, DISP_OPEN_CREATE)) {
+               /* If file created on server, don't depend on parent UPDATE
+                * lock to unhide it. It is left hidden and next lookup can
+                * find it in ll_splice_alias.
+                */
                /* Check that parent has UPDATE lock. */
                struct lookup_intent parent_it = {
                                        .it_op = IT_GETATTR,
                /* Check that parent has UPDATE lock. */
                struct lookup_intent parent_it = {
                                        .it_op = IT_GETATTR,
@@ -553,6 +566,121 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
         return retval;
 }
 
         return retval;
 }
 
+#ifdef HAVE_IOP_ATOMIC_OPEN
+static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
+                                  unsigned int flags)
+{
+       struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
+       struct dentry *de;
+
+       CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),flags=%u\n",
+              dentry->d_name.len, dentry->d_name.name, parent->i_ino,
+              parent->i_generation, parent, flags);
+
+       /* Optimize away (CREATE && !OPEN). Let .create handle the race. */
+       if ((flags & LOOKUP_CREATE ) && !(flags & LOOKUP_OPEN)) {
+               ll_dops_init(dentry, 1, 1);
+               __d_lustre_invalidate(dentry);
+               d_add(dentry, NULL);
+               return NULL;
+       }
+
+       if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
+               itp = NULL;
+       else
+               itp = &it;
+       de = ll_lookup_it(parent, dentry, itp, 0);
+
+       if (itp != NULL)
+               ll_intent_release(itp);
+
+       return de;
+}
+
+/*
+ * For cached negative dentry and new dentry, handle lookup/create/open
+ * together.
+ */
+static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
+                         struct file *file, unsigned open_flags,
+                         umode_t mode, int *opened)
+{
+       struct lookup_intent *it;
+       struct dentry *de;
+       long long lookup_flags = LOOKUP_OPEN;
+       int rc = 0;
+       ENTRY;
+
+       CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),file %p,"
+                          "open_flags %x,mode %x opened %d\n",
+              dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+              dir->i_generation, dir, file, open_flags, mode, *opened);
+
+       OBD_ALLOC(it, sizeof(*it));
+       if (!it)
+               RETURN(-ENOMEM);
+
+       it->it_op = IT_OPEN;
+       if (mode) {
+               it->it_op |= IT_CREAT;
+               lookup_flags |= LOOKUP_CREATE;
+       }
+       it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
+       it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
+
+       /* Dentry added to dcache tree in ll_lookup_it */
+       de = ll_lookup_it(dir, dentry, it, lookup_flags);
+       if (IS_ERR(de))
+               rc = PTR_ERR(de);
+       else if (de != NULL)
+               dentry = de;
+
+       if (!rc) {
+               if (it_disposition(it, DISP_OPEN_CREATE)) {
+                       /* Dentry instantiated in ll_create_it. */
+                       rc = ll_create_it(dir, dentry, mode, it);
+                       if (rc) {
+                               /* We dget in ll_splice_alias. */
+                               if (de != NULL)
+                                       dput(de);
+                               goto out_release;
+                       }
+
+                       *opened |= FILE_CREATED;
+               }
+               if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) {
+                       /* Open dentry. */
+                       if (S_ISFIFO(dentry->d_inode->i_mode)) {
+                               /* We cannot call open here as it would
+                                * deadlock.
+                                */
+                               if (it_disposition(it, DISP_ENQ_OPEN_REF))
+                                       ptlrpc_req_finished(
+                                                      (struct ptlrpc_request *)
+                                                         it->d.lustre.it_data);
+                               rc = finish_no_open(file, de);
+                       } else {
+                               file->private_data = it;
+                               rc = finish_open(file, dentry, NULL, opened);
+                               /* We dget in ll_splice_alias. finish_open takes
+                                * care of dget for fd open.
+                                */
+                               if (de != NULL)
+                                       dput(de);
+                       }
+               } else {
+                       rc = finish_no_open(file, de);
+               }
+       }
+
+out_release:
+       ll_intent_release(it);
+       OBD_FREE(it, sizeof(*it));
+
+       RETURN(rc);
+}
+
+#else /* !HAVE_IOP_ATOMIC_OPEN */
 struct lookup_intent *ll_convert_intent(struct open_intent *oit,
                                         int lookup_flags)
 {
 struct lookup_intent *ll_convert_intent(struct open_intent *oit,
                                         int lookup_flags)
 {
@@ -647,6 +775,7 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
 
         RETURN(de);
 }
 
         RETURN(de);
 }
+#endif /* HAVE_IOP_ATOMIC_OPEN */
 
 /* We depend on "mode" being set with the proper file type/umask by now */
 static struct inode *ll_create_node(struct inode *dir, const char *name,
 
 /* We depend on "mode" being set with the proper file type/umask by now */
 static struct inode *ll_create_node(struct inode *dir, const char *name,
@@ -819,6 +948,30 @@ static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode,
         RETURN(err);
 }
 
         RETURN(err);
 }
 
+#ifdef HAVE_IOP_ATOMIC_OPEN
+/*
+ * Plain create. Intent create is handled in atomic_open.
+ */
+static int ll_create_nd(struct inode *dir, struct dentry *dentry,
+                       umode_t mode, bool want_excl)
+{
+       int rc;
+
+       CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),"
+                          "flags=%u, excl=%d\n",
+              dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+              dir->i_generation, dir, mode, want_excl);
+
+       rc = ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry);
+
+       ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1);
+
+       CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s, unhashed %d\n",
+              dentry->d_name.len, dentry->d_name.name, d_unhashed(dentry));
+
+       return rc;
+}
+#else /* !HAVE_IOP_ATOMIC_OPEN */
 static int ll_create_nd(struct inode *dir, struct dentry *dentry,
                        ll_umode_t mode, struct nameidata *nd)
 {
 static int ll_create_nd(struct inode *dir, struct dentry *dentry,
                        ll_umode_t mode, struct nameidata *nd)
 {
@@ -855,6 +1008,7 @@ out:
 
         return rc;
 }
 
         return rc;
 }
+#endif /* HAVE_IOP_ATOMIC_OPEN */
 
 static int ll_symlink_generic(struct inode *dir, struct qstr *name,
                               const char *tgt, struct dentry *dchild)
 
 static int ll_symlink_generic(struct inode *dir, struct qstr *name,
                               const char *tgt, struct dentry *dchild)
@@ -1189,6 +1343,9 @@ static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 struct inode_operations ll_dir_inode_operations = {
        .mknod              = ll_mknod,
 
 struct inode_operations ll_dir_inode_operations = {
        .mknod              = ll_mknod,
+#ifdef HAVE_IOP_ATOMIC_OPEN
+       .atomic_open        = ll_atomic_open,
+#endif
        .lookup             = ll_lookup_nd,
        .create             = ll_create_nd,
        /* We need all these non-raw things for NFSD, to not patch it. */
        .lookup             = ll_lookup_nd,
        .create             = ll_create_nd,
        /* We need all these non-raw things for NFSD, to not patch it. */