From: Peng Tao Date: Tue, 28 Aug 2012 01:53:27 +0000 (+0800) Subject: LU-1994 llite: atomic_open support X-Git-Tag: 2.3.59~81 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=784cd144103871bd421c139c09bfbf4d5d29ca08;hp=1f773208a4eee2088dc4707ebb904162978043de LU-1994 llite: atomic_open support v3.6 adds iop->atomic_open, to facilitate atomic lookup/create/open. In the meantime, struct open_intent is removed from nameidata, and ->lookup()/->create() on longer carries intent data. So Lustre has to support atomic_open in order to preserve intent functionality. Signed-off-by: Peng Tao Change-Id: Ibf4eee5a98c7a86417dc22b0075f647600328668 Reviewed-on: http://review.whamcloud.com/4387 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Yang Sheng Reviewed-by: Lai Siyao Reviewed-by: Fan Yong Reviewed-by: Oleg Drokin --- diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 109d528..4cc4bbf 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -2176,6 +2176,25 @@ EXTRA_KCFLAGS="$tmp_flags" ]) # +# 3.6 vfs adds iop->atomic_open +# +AC_DEFUN([LC_HAVE_IOP_ATOMIC_OPEN], +[AC_MSG_CHECKING([if iop has atomic_open]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct inode_operations iop; + iop.atomic_open = NULL; +],[ + AC_DEFINE(HAVE_IOP_ATOMIC_OPEN, 1, + [have iop atomic_open]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# # LC_PROG_LINUX # # Lustre linux kernel checks @@ -2348,6 +2367,7 @@ AC_DEFUN([LC_PROG_LINUX], # 3.6 LC_HAVE_DENTRY_D_ALIAS_HLIST LC_DENTRY_OPEN_USE_PATH + LC_HAVE_IOP_ATOMIC_OPEN # if test x$enable_server = xyes ; then diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index ffd04f5..6ce32f7 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -642,6 +642,34 @@ out_sa: goto mark; } +#ifdef HAVE_IOP_ATOMIC_OPEN +/* + * Always trust cached dentries. Update statahead window if necessary. + */ +int ll_revalidate_nd(struct dentry *dentry, unsigned int flags) +{ + struct inode *parent = dentry->d_parent->d_inode; + int unplug = 0; + + ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:name=%s,flags=%u\n", + dentry->d_name.name, flags); + + if (!(flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) && + ll_need_statahead(parent, dentry) > 0) { + if (flags & LOOKUP_RCU) + RETURN(-ECHILD); + + if (dentry->d_inode == NULL) + unplug = 1; + do_statahead_enter(parent, &dentry, unplug); + ll_statahead_mark(parent, dentry); + } + + RETURN(1); +} + +#else /* !HAVE_IOP_ATOMIC_OPEN */ int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd) { int rc; @@ -709,6 +737,7 @@ out_it: RETURN(rc); } +#endif /* HAVE_IOP_ATOMIC_OPEN */ void ll_d_iput(struct dentry *de, struct inode *inode) { diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index c8f0f3b..7681b28 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -673,10 +673,10 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, struct lustre_md *lic); int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, int flag); +#ifndef HAVE_IOP_ATOMIC_OPEN struct lookup_intent *ll_convert_intent(struct open_intent *oit, int lookup_flags); -int ll_lookup_it_finish(struct ptlrpc_request *request, - struct lookup_intent *it, void *data); +#endif struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de); /* llite/rw.c */ @@ -708,7 +708,11 @@ extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, struct lustre_handle *lockh, __u64 flags); int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *, __u64 bits); +#ifdef HAVE_IOP_ATOMIC_OPEN +int ll_revalidate_nd(struct dentry *dentry, unsigned int flags); +#else int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd); +#endif int ll_file_open(struct inode *inode, struct file *file); int ll_file_release(struct inode *inode, struct file *file); int ll_glimpse_ioctl(struct ll_sb_info *sbi, @@ -1285,44 +1289,56 @@ ll_statahead_mark(struct inode *dir, struct dentry *dentry) } static inline int -ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug) +ll_need_statahead(struct inode *dir, struct dentry *dentryp) { - struct ll_inode_info *lli; - struct ll_dentry_data *ldd; + struct ll_inode_info *lli; + struct ll_dentry_data *ldd; - if (ll_i2sbi(dir)->ll_sa_max == 0) - return -EAGAIN; + if (ll_i2sbi(dir)->ll_sa_max == 0) + return -EAGAIN; - lli = ll_i2info(dir); - /* not the same process, don't statahead */ - if (lli->lli_opendir_pid != cfs_curproc_pid()) - return -EAGAIN; + lli = ll_i2info(dir); + /* not the same process, don't statahead */ + if (lli->lli_opendir_pid != cfs_curproc_pid()) + return -EAGAIN; /* statahead has been stopped */ if (lli->lli_opendir_key == NULL) return -EAGAIN; - ldd = ll_d2d(*dentryp); - /* - * When stats a dentry, the system trigger more than once "revalidate" - * or "lookup", for "getattr", for "getxattr", and maybe for others. - * Under patchless client mode, the operation intent is not accurate, - * which maybe misguide the statahead thread. For example: - * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe - * have the same operation intent -- "IT_GETATTR". - * In fact, one dentry should has only one chance to interact with the - * statahead thread, otherwise the statahead windows will be confused. - * The solution is as following: - * Assign "lld_sa_generation" with "sai_generation" when a dentry - * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR" - * will bypass interacting with statahead thread for checking: - * "lld_sa_generation == lli_sai->sai_generation" - */ - if (ldd && lli->lli_sai && - ldd->lld_sa_generation == lli->lli_sai->sai_generation) - return -EAGAIN; + ldd = ll_d2d(dentryp); + /* + * When stats a dentry, the system trigger more than once "revalidate" + * or "lookup", for "getattr", for "getxattr", and maybe for others. + * Under patchless client mode, the operation intent is not accurate, + * which maybe misguide the statahead thread. For example: + * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe + * have the same operation intent -- "IT_GETATTR". + * In fact, one dentry should has only one chance to interact with the + * statahead thread, otherwise the statahead windows will be confused. + * The solution is as following: + * Assign "lld_sa_generation" with "sai_generation" when a dentry + * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR" + * will bypass interacting with statahead thread for checking: + * "lld_sa_generation == lli_sai->sai_generation" + */ + if (ldd && lli->lli_sai && + ldd->lld_sa_generation == lli->lli_sai->sai_generation) + return -EAGAIN; + + return 1; +} + +static inline int +ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug) +{ + int ret; + + ret = ll_need_statahead(dir, *dentryp); + if (ret <= 0) + return ret; - return do_statahead_enter(dir, dentryp, only_unplug); + return do_statahead_enter(dir, dentryp, only_unplug); } /* llite ioctl register support rountine */ diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 6fa058c..48303a3 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -51,6 +51,9 @@ #include #include "llite_internal.h" +static int ll_create_it(struct inode *, struct dentry *, + int, struct lookup_intent *); + /* * Check if we have something mounted at the named dchild. * In such a case there would always be dentry present. @@ -421,6 +424,8 @@ int ll_lookup_it_finish(struct ptlrpc_request *request, /* NB 1 request reference will be taken away by ll_intent_lock() * when I return */ + CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it, + it->d.lustre.it_disposition); if (!it_disposition(it, DISP_LOOKUP_NEG)) { rc = ll_prep_inode(&inode, request, (*de)->d_sb); if (rc) @@ -438,13 +443,21 @@ int ll_lookup_it_finish(struct ptlrpc_request *request, Also see bug 7198. */ } - *de = ll_splice_alias(inode, *de); + /* Only hash *de if it is unhashed (new dentry). + * Atoimc_open may passin hashed dentries for open. + */ + if (d_unhashed(*de)) + *de = ll_splice_alias(inode, *de); if (!it_disposition(it, DISP_LOOKUP_NEG)) { /* we have lookup look - unhide dentry */ if (bits & MDS_INODELOCK_LOOKUP) d_lustre_revalidate(*de); - } else { + } else if (!it_disposition(it, DISP_OPEN_CREATE)) { + /* If file created on server, don't depend on parent UPDATE + * lock to unhide it. It is left hidden and next lookup can + * find it in ll_splice_alias. + */ /* Check that parent has UPDATE lock. */ struct lookup_intent parent_it = { .it_op = IT_GETATTR, @@ -553,6 +566,121 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, return retval; } +#ifdef HAVE_IOP_ATOMIC_OPEN +static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, + unsigned int flags) +{ + struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; + struct dentry *de; + + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),flags=%u\n", + dentry->d_name.len, dentry->d_name.name, parent->i_ino, + parent->i_generation, parent, flags); + + /* Optimize away (CREATE && !OPEN). Let .create handle the race. */ + if ((flags & LOOKUP_CREATE ) && !(flags & LOOKUP_OPEN)) { + ll_dops_init(dentry, 1, 1); + __d_lustre_invalidate(dentry); + d_add(dentry, NULL); + return NULL; + } + + if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) + itp = NULL; + else + itp = ⁢ + de = ll_lookup_it(parent, dentry, itp, 0); + + if (itp != NULL) + ll_intent_release(itp); + + return de; +} + +/* + * For cached negative dentry and new dentry, handle lookup/create/open + * together. + */ +static int ll_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned open_flags, + umode_t mode, int *opened) +{ + struct lookup_intent *it; + struct dentry *de; + long long lookup_flags = LOOKUP_OPEN; + int rc = 0; + ENTRY; + + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),file %p," + "open_flags %x,mode %x opened %d\n", + dentry->d_name.len, dentry->d_name.name, dir->i_ino, + dir->i_generation, dir, file, open_flags, mode, *opened); + + OBD_ALLOC(it, sizeof(*it)); + if (!it) + RETURN(-ENOMEM); + + it->it_op = IT_OPEN; + if (mode) { + it->it_op |= IT_CREAT; + lookup_flags |= LOOKUP_CREATE; + } + it->it_create_mode = (mode & S_IALLUGO) | S_IFREG; + it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags); + + /* Dentry added to dcache tree in ll_lookup_it */ + de = ll_lookup_it(dir, dentry, it, lookup_flags); + if (IS_ERR(de)) + rc = PTR_ERR(de); + else if (de != NULL) + dentry = de; + + if (!rc) { + if (it_disposition(it, DISP_OPEN_CREATE)) { + /* Dentry instantiated in ll_create_it. */ + rc = ll_create_it(dir, dentry, mode, it); + if (rc) { + /* We dget in ll_splice_alias. */ + if (de != NULL) + dput(de); + goto out_release; + } + + *opened |= FILE_CREATED; + } + if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) { + /* Open dentry. */ + if (S_ISFIFO(dentry->d_inode->i_mode)) { + /* We cannot call open here as it would + * deadlock. + */ + if (it_disposition(it, DISP_ENQ_OPEN_REF)) + ptlrpc_req_finished( + (struct ptlrpc_request *) + it->d.lustre.it_data); + rc = finish_no_open(file, de); + } else { + file->private_data = it; + rc = finish_open(file, dentry, NULL, opened); + /* We dget in ll_splice_alias. finish_open takes + * care of dget for fd open. + */ + if (de != NULL) + dput(de); + } + } else { + rc = finish_no_open(file, de); + } + } + +out_release: + ll_intent_release(it); + OBD_FREE(it, sizeof(*it)); + + RETURN(rc); +} + +#else /* !HAVE_IOP_ATOMIC_OPEN */ struct lookup_intent *ll_convert_intent(struct open_intent *oit, int lookup_flags) { @@ -647,6 +775,7 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, RETURN(de); } +#endif /* HAVE_IOP_ATOMIC_OPEN */ /* We depend on "mode" being set with the proper file type/umask by now */ static struct inode *ll_create_node(struct inode *dir, const char *name, @@ -819,6 +948,30 @@ static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode, RETURN(err); } +#ifdef HAVE_IOP_ATOMIC_OPEN +/* + * Plain create. Intent create is handled in atomic_open. + */ +static int ll_create_nd(struct inode *dir, struct dentry *dentry, + umode_t mode, bool want_excl) +{ + int rc; + + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)," + "flags=%u, excl=%d\n", + dentry->d_name.len, dentry->d_name.name, dir->i_ino, + dir->i_generation, dir, mode, want_excl); + + rc = ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry); + + ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1); + + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s, unhashed %d\n", + dentry->d_name.len, dentry->d_name.name, d_unhashed(dentry)); + + return rc; +} +#else /* !HAVE_IOP_ATOMIC_OPEN */ static int ll_create_nd(struct inode *dir, struct dentry *dentry, ll_umode_t mode, struct nameidata *nd) { @@ -855,6 +1008,7 @@ out: return rc; } +#endif /* HAVE_IOP_ATOMIC_OPEN */ static int ll_symlink_generic(struct inode *dir, struct qstr *name, const char *tgt, struct dentry *dchild) @@ -1189,6 +1343,9 @@ static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode_operations ll_dir_inode_operations = { .mknod = ll_mknod, +#ifdef HAVE_IOP_ATOMIC_OPEN + .atomic_open = ll_atomic_open, +#endif .lookup = ll_lookup_nd, .create = ll_create_nd, /* We need all these non-raw things for NFSD, to not patch it. */