From d10c9e05ab593f361fdfd27842766a1924e63e58 Mon Sep 17 00:00:00 2001 From: yangsheng Date: Sat, 26 May 2012 02:12:36 +0800 Subject: [PATCH] LU-506 kernel: FC15 - support dcache scalability changes. * dcache supports rcu-walk, but lustre doesn't, so revalidate/check_acl/permission fall back to ref-walk. * dcache_lock is completely removed in latest kernel, many fields are protected by inode->i_lock now. * remove super hack d_rehash_cond(), and treats DCACHE_LUSTRE_INVALID similar to DCACHE_DISCONNECTED, therefore dentry doesn't need to be dropped and rehashed frequently. * .lookup(LOOKUP_CREATE) calls d_add() dentry directly, and .create only needs to d_instantiate() this dentry. * set sb->s_d_op to ll_d_ops, and d_alloc() will initialize dentry with it, therefore it's no need to set dentry operations explicitely for new kernel. * .d_delete is called before dentry refcount decrease for new kernel. * other cleanups. Change-Id: Ib08bd4a45dba8d3d5b6150e9a4df95728fdf33e2 Signed-off-by: Lai Siyao Signed-off-by: yang sheng Reviewed-on: http://review.whamcloud.com/1865 Reviewed-by: Fan Yong Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- libcfs/libcfs/linux/linux-proc.c | 2 +- lustre/autoconf/lustre-core.m4 | 62 +++++ lustre/include/linux/lustre_compat25.h | 13 -- lustre/include/linux/lustre_patchless_compat.h | 21 +- lustre/include/linux/lvfs.h | 2 +- lustre/ldlm/ldlm_lock.c | 28 ++- lustre/llite/dcache.c | 234 +++++++++---------- lustre/llite/file.c | 16 +- lustre/llite/llite_internal.h | 78 +++++-- lustre/llite/llite_lib.c | 9 +- lustre/llite/namei.c | 300 +++++++++---------------- lustre/llite/statahead.c | 31 ++- lustre/lvfs/lvfs_linux.c | 4 +- 13 files changed, 387 insertions(+), 413 deletions(-) diff --git a/libcfs/libcfs/linux/linux-proc.c b/libcfs/libcfs/linux/linux-proc.c index 7612c60..6a3f433 100644 --- a/libcfs/libcfs/linux/linux-proc.c +++ b/libcfs/libcfs/linux/linux-proc.c @@ -465,7 +465,7 @@ static cfs_sysctl_table_t lnet_table[] = { }, { - .ctl_name = PSDEV_CPT_TABLE, + INIT_CTL_NAME(PSDEV_CPT_TABLE) .procname = "cpu_partition_table", .maxlen = 128, .mode = 0444, diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 1deef60..dfb0c0e 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1891,6 +1891,65 @@ LB_LINUX_TRY_COMPILE([ ]) # +# 2.6.38 dentry_operations.d_compare() taken 7 arguments. +# +AC_DEFUN([LC_D_COMPARE_7ARGS], +[AC_MSG_CHECKING([if d_compare taken 7 arguments]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + ((struct dentry_operations*)0)->d_compare(NULL,NULL,NULL,NULL,0,NULL,NULL); +],[ + AC_DEFINE(HAVE_D_COMPARE_7ARGS, 1, + [d_compare need 7 arguments]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# 2.6.38 dentry_operations.d_delete() defined 'const' for 1st parameter. +# +AC_DEFUN([LC_D_DELETE_CONST], +[AC_MSG_CHECKING([if d_delete has const declare on first parameter]) +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" +LB_LINUX_TRY_COMPILE([ + #include +],[ + const struct dentry *d = NULL; + ((struct dentry_operations*)0)->d_delete(d); +],[ + AC_DEFINE(HAVE_D_DELETE_CONST, const, + [d_delete first parameter declared const]) + AC_MSG_RESULT([yes]) +],[ + AC_DEFINE(HAVE_D_DELETE_CONST, , []) + AC_MSG_RESULT([no]) +]) +EXTRA_KCFLAGS="$tmp_flags" +]) + +# +# 2.6.38 dcache_lock removed. rcu-walk commited. +# +AC_DEFUN([LC_DCACHE_LOCK], +[AC_MSG_CHECKING([if dcache_lock is exist]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + spin_lock(&dcache_lock); +],[ + AC_DEFINE(HAVE_DCACHE_LOCK, 1, + [dcache_lock is exist]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# # 2.6.38 export blkdev_get_by_dev # AC_DEFUN([LC_BLKDEV_GET_BY_DEV], @@ -2152,6 +2211,9 @@ AC_DEFUN([LC_PROG_LINUX], LC_BLKDEV_GET_BY_DEV LC_GENERIC_PERMISSION LC_QUOTA_ON_USE_PATH + LC_DCACHE_LOCK + LC_D_COMPARE_7ARGS + LC_D_DELETE_CONST # 2.6.39 LC_REQUEST_QUEUE_UNPLUG_FN diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 204322f..abca410 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -157,9 +157,6 @@ do {cfs_mutex_lock_nested(&(inode)->i_mutex, I_MUTEX_PARENT); } while(0) #define set_page_private(page, v) ((page)->private = (v)) #endif -#define lock_dentry(___dentry) cfs_spin_lock(&(___dentry)->d_lock) -#define unlock_dentry(___dentry) cfs_spin_unlock(&(___dentry)->d_lock) - #define ll_kernel_locked() kernel_locked() /* @@ -258,11 +255,6 @@ static inline int cfs_cleanup_group_info(void) #include -#if !defined(HAVE_D_REHASH_COND) && defined(HAVE___D_REHASH) -#define d_rehash_cond(dentry, lock) __d_rehash(dentry, lock) -extern void __d_rehash(struct dentry *dentry, int lock); -#endif - #define CheckWriteback(page, cmd) \ ((!PageWriteback(page) && (cmd & OBD_BRW_READ)) || \ (PageWriteback(page) && (cmd & OBD_BRW_WRITE))) @@ -307,11 +299,6 @@ static inline int mapping_has_pages(struct address_space *mapping) vfs_symlink(dir, dentry, path) #endif -#define ll_set_dflags(dentry, flags) do { \ - cfs_spin_lock(&dentry->d_lock); \ - dentry->d_flags |= flags; \ - cfs_spin_unlock(&dentry->d_lock); \ - } while(0) #endif #ifndef container_of diff --git a/lustre/include/linux/lustre_patchless_compat.h b/lustre/include/linux/lustre_patchless_compat.h index 0e7e00e..d79d1c8 100644 --- a/lustre/include/linux/lustre_patchless_compat.h +++ b/lustre/include/linux/lustre_patchless_compat.h @@ -105,21 +105,12 @@ truncate_complete_page(struct address_space *mapping, struct page *page) } #endif /* !HAVE_TRUNCATE_COMPLETE_PAGE */ -#if !defined(HAVE_D_REHASH_COND) && !defined(HAVE___D_REHASH) -/* megahack */ -static inline void d_rehash_cond(struct dentry * entry, int lock) -{ - if (!lock) - spin_unlock(&dcache_lock); - - d_rehash(entry); - - if (!lock) - spin_lock(&dcache_lock); -} - -#define __d_rehash(dentry, lock) d_rehash_cond(dentry, lock) -#endif /* !HAVE_D_REHASH_COND && !HAVE___D_REHASH*/ +#ifdef HAVE_DCACHE_LOCK +# define dget_dlock(d) dget_locked(d) +# define d_refcount(d) atomic_read(&(d)->d_count) +#else +# define d_refcount(d) ((d)->d_count) +#endif /* HAVE_DCACHE_LOCK */ #ifdef ATTR_OPEN # define ATTR_FROM_OPEN ATTR_OPEN diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index 5e38880..4b067f9 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -111,7 +111,7 @@ static inline void l_dput(struct dentry *de) if (!de || IS_ERR(de)) return; //shrink_dcache_parent(de); - LASSERT(cfs_atomic_read(&de->d_count) > 0); + LASSERT(d_refcount(de) > 0); dput(de); } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 34ac565..550cc1c 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1883,8 +1883,10 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, struct ldlm_resource *res; struct ldlm_namespace *ns; int granted = 0; - int old_mode; - struct sl_insert_point prev; +#ifdef HAVE_SERVER_SUPPORT + int old_mode; + struct sl_insert_point prev; +#endif struct ldlm_interval *node; ENTRY; @@ -1907,15 +1909,19 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, res = lock->l_resource; ns = ldlm_res_to_ns(res); - old_mode = lock->l_req_mode; - lock->l_req_mode = new_mode; - if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) { - /* remember the lock position where the lock might be - * added back to the granted list later and also - * remember the join mode for skiplist fixing. */ - prev.res_link = lock->l_res_link.prev; - prev.mode_link = lock->l_sl_mode.prev; - prev.policy_link = lock->l_sl_policy.prev; +#ifdef HAVE_SERVER_SUPPORT + old_mode = lock->l_req_mode; +#endif + lock->l_req_mode = new_mode; + if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) { +#ifdef HAVE_SERVER_SUPPORT + /* remember the lock position where the lock might be + * added back to the granted list later and also + * remember the join mode for skiplist fixing. */ + prev.res_link = lock->l_res_link.prev; + prev.mode_link = lock->l_sl_mode.prev; + prev.policy_link = lock->l_sl_policy.prev; +#endif ldlm_resource_unlink_lock(lock); } else { ldlm_resource_unlink_lock(lock); diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 45543de..88db91b 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -48,8 +48,6 @@ #include "llite_internal.h" -cfs_spinlock_t ll_lookup_lock = CFS_SPIN_LOCK_UNLOCKED; - /* should NOT be called with the dcache lock, see fs/dcache.c */ static void ll_release(struct dentry *de) { @@ -78,30 +76,45 @@ static void ll_release(struct dentry *de) * an AST before calling d_revalidate_it(). The dentry still exists (marked * INVALID) so d_lookup() matches it, but we have no lock on it (so * lock_match() fails) and we spin around real_lookup(). */ +#ifdef HAVE_D_COMPARE_7ARGS +int ll_dcompare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) +#else int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name) +#endif { - struct dentry *dchild; - ENTRY; +#ifdef HAVE_D_COMPARE_7ARGS + ENTRY; - if (d_name->len != name->len) - RETURN(1); + if (len != name->len) + RETURN(1); - if (memcmp(d_name->name, name->name, name->len)) - RETURN(1); + if (memcmp(str, name->name, len)) + RETURN(1); +#else + struct dentry *dentry; + ENTRY; - /* XXX: d_name must be in-dentry structure */ - dchild = container_of(d_name, struct dentry, d_name); /* ugh */ + if (d_name->len != name->len) + RETURN(1); - CDEBUG(D_DENTRY,"found name %.*s(%p) - flags %d/%x - refc %d\n", - name->len, name->name, dchild, - d_mountpoint(dchild), dchild->d_flags & DCACHE_LUSTRE_INVALID, - atomic_read(&dchild->d_count)); + if (memcmp(d_name->name, name->name, name->len)) + RETURN(1); - /* mountpoint is always valid */ - if (d_mountpoint(dchild)) - RETURN(0); + /* XXX: d_name must be in-dentry structure */ + dentry = container_of(d_name, struct dentry, d_name); /* ugh */ +#endif + + CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n", + name->len, name->name, dentry, dentry->d_flags, + d_refcount(dentry)); + + /* mountpoint is always valid */ + if (d_mountpoint((struct dentry *)dentry)) + RETURN(0); - if (dchild->d_flags & DCACHE_LUSTRE_INVALID) + if (d_lustre_invalid(dentry)) RETURN(1); RETURN(0); @@ -147,24 +160,30 @@ static int find_cbdata(struct inode *inode) * - return 0 to cache the dentry * Should NOT be called with the dcache lock, see fs/dcache.c */ -static int ll_ddelete(struct dentry *de) +static int ll_ddelete(HAVE_D_DELETE_CONST struct dentry *de) { - ENTRY; - LASSERT(de); - - CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n", - (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"), - de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, - d_unhashed(de) ? "" : "hashed,", - list_empty(&de->d_subdirs) ? "" : "subdirs"); - - /* if not ldlm lock for this inode, set i_nlink to 0 so that - * this inode can be recycled later b=20433 */ - LASSERT(atomic_read(&de->d_count) == 0); - if (de->d_inode && !find_cbdata(de->d_inode)) - de->d_inode->i_nlink = 0; - - if (de->d_flags & DCACHE_LUSTRE_INVALID) + ENTRY; + LASSERT(de); + + CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n", + d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping", + de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, + d_unhashed((struct dentry *)de) ? "" : "hashed,", + list_empty(&de->d_subdirs) ? "" : "subdirs"); + + /* kernel >= 2.6.38 last refcount is decreased after this function. */ +#ifdef DCACHE_OP_DELETE + LASSERT(d_refcount(de) == 1); +#else + LASSERT(d_refcount(de) == 0); +#endif + + /* if not ldlm lock for this inode, set i_nlink to 0 so that + * this inode can be recycled later b=20433 */ + if (de->d_inode && !find_cbdata(de->d_inode)) + de->d_inode->i_nlink = 0; + + if (d_lustre_invalid((struct dentry *)de)) RETURN(1); RETURN(0); @@ -177,19 +196,19 @@ static int ll_set_dd(struct dentry *de) CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n", de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, - atomic_read(&de->d_count)); - - if (de->d_fsdata == NULL) { - struct ll_dentry_data *lld; - - OBD_ALLOC_PTR(lld); - if (likely(lld != NULL)) { - lock_dentry(de); - if (likely(de->d_fsdata == NULL)) - de->d_fsdata = lld; - else - OBD_FREE_PTR(lld); - unlock_dentry(de); + d_refcount(de)); + + if (de->d_fsdata == NULL) { + struct ll_dentry_data *lld; + + OBD_ALLOC_PTR(lld); + if (likely(lld != NULL)) { + spin_lock(&de->d_lock); + if (likely(de->d_fsdata == NULL)) + de->d_fsdata = lld; + else + OBD_FREE_PTR(lld); + spin_unlock(&de->d_lock); } else { RETURN(-ENOMEM); } @@ -214,7 +233,11 @@ int ll_dops_init(struct dentry *de, int block, int init_sa) if (lld != NULL && init_sa != 0) lld->lld_sa_generation = 0; - de->d_op = &ll_d_ops; +#ifdef DCACHE_OP_HASH + LASSERT(de->d_op == &ll_d_ops); +#else + de->d_op = &ll_d_ops; +#endif return rc; } @@ -254,71 +277,18 @@ void ll_intent_release(struct lookup_intent *it) EXIT; } -/* Drop dentry if it is not used already, unhash otherwise. - Should be called with dcache lock held! - Returns: 1 if dentry was dropped, 0 if unhashed. */ -int ll_drop_dentry(struct dentry *dentry) -{ - lock_dentry(dentry); - if (atomic_read(&dentry->d_count) == 0) { - CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p " - "inode %p\n", dentry->d_name.len, - dentry->d_name.name, dentry, dentry->d_parent, - dentry->d_inode); - dget_locked(dentry); - __d_drop(dentry); - unlock_dentry(dentry); - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); - dput(dentry); - cfs_spin_lock(&ll_lookup_lock); - spin_lock(&dcache_lock); - return 1; - } - /* disconected dentry can not be find without lookup, because we - * not need his to unhash or mark invalid. */ - if (dentry->d_flags & DCACHE_DISCONNECTED) { - unlock_dentry(dentry); - RETURN (0); - } - - if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) { - CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p " - "inode %p refc %d\n", dentry->d_name.len, - dentry->d_name.name, dentry, dentry->d_parent, - dentry->d_inode, atomic_read(&dentry->d_count)); - /* actually we don't unhash the dentry, rather just - * mark it inaccessible for to __d_lookup(). otherwise - * sys_getcwd() could return -ENOENT -bzzz */ - dentry->d_flags |= DCACHE_LUSTRE_INVALID; - if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)) - __d_drop(dentry); - } - unlock_dentry(dentry); - return 0; -} - -void ll_unhash_aliases(struct inode *inode) +void ll_invalidate_aliases(struct inode *inode) { - struct list_head *tmp, *head; - ENTRY; - - if (inode == NULL) { - CERROR("unexpected NULL inode, tell phil\n"); - return; - } + struct dentry *dentry; + ENTRY; - CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", - inode->i_ino, inode->i_generation, inode); + LASSERT(inode != NULL); - head = &inode->i_dentry; - cfs_spin_lock(&ll_lookup_lock); - spin_lock(&dcache_lock); -restart: - tmp = head; - while ((tmp = tmp->next) != head) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); + CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", + inode->i_ino, inode->i_generation, inode); + ll_lock_dcache(inode); + cfs_list_for_each_entry(dentry, &inode->i_dentry, d_alias) { CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p " "inode %p flags %d\n", dentry->d_name.len, dentry->d_name.name, dentry, dentry->d_parent, @@ -331,11 +301,9 @@ restart: libcfs_debug_dumpstack(NULL); } - if (ll_drop_dentry(dentry)) - goto restart; - } - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); + d_lustre_invalidate(dentry); + } + ll_unlock_dcache(inode); EXIT; } @@ -415,7 +383,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, if (it && (it->it_op & IT_CREAT)) RETURN(0); - if (de->d_flags & DCACHE_LUSTRE_INVALID) + if (d_lustre_invalid(de)) RETURN(0); ibits = MDS_INODELOCK_UPDATE; @@ -442,7 +410,7 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, ll_frob_intent(&it, &lookup_it); LASSERT(it); - if (it->it_op == IT_LOOKUP && !(de->d_flags & DCACHE_LUSTRE_INVALID)) + if (it->it_op == IT_LOOKUP && !d_lustre_invalid(de)) RETURN(1); if ((it->it_op == IT_OPEN) && de->d_inode) { @@ -550,10 +518,6 @@ revalidate_finish: } rc = 1; - /* unfortunately ll_intent_lock may cause a callback and revoke our - * dentry */ - ll_dentry_rehash(de, 0); - out: /* We do not free request as it may be reused during following lookup * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will @@ -562,18 +526,17 @@ out: if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE)) ptlrpc_req_finished(req); if (rc == 0) { - ll_unhash_aliases(de->d_inode); - /* done in ll_unhash_aliases() - dentry->d_flags |= DCACHE_LUSTRE_INVALID; */ - } else { - __u64 bits = 0; - - CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p " - "inode %p refc %d\n", de->d_name.len, - de->d_name.name, de, de->d_parent, de->d_inode, - atomic_read(&de->d_count)); - ll_set_lock_data(exp, de->d_inode, it, &bits); - ll_dentry_reset_flags(de, bits); + ll_invalidate_aliases(de->d_inode); + } else { + __u64 bits = 0; + + CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p " + "inode %p refc %d\n", de->d_name.len, + de->d_name.name, de, de->d_parent, de->d_inode, + d_refcount(de)); + ll_set_lock_data(exp, de->d_inode, it, &bits); + if ((bits & MDS_INODELOCK_LOOKUP) && d_lustre_invalid(de)) + d_lustre_revalidate(de); ll_lookup_finish_locks(it, de); } @@ -650,6 +613,11 @@ int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd) int rc; ENTRY; +#ifdef LOOKUP_RCU + if (nd->flags & LOOKUP_RCU) + return -ECHILD; +#endif + if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) { struct lookup_intent *it; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 5498217..109d3b9 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2356,13 +2356,8 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it, do_lookup() -> ll_revalidate_it(). We cannot use d_drop here to preserve get_cwd functionality on 2.6. Bug 10503 */ - if (!dentry->d_inode->i_nlink) { - cfs_spin_lock(&ll_lookup_lock); - spin_lock(&dcache_lock); - ll_drop_dentry(dentry); - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); - } + if (!dentry->d_inode->i_nlink) + d_lustre_invalidate(dentry); ll_lookup_finish_locks(&oit, dentry); } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) { @@ -2523,7 +2518,7 @@ lustre_check_acl(struct inode *inode, int mask) int rc; ENTRY; -#ifdef HAVE_GENERIC_PERMISSION_4ARGS +#ifdef IPERM_FLAG_RCU if (flags & IPERM_FLAG_RCU) return -ECHILD; #endif @@ -2556,6 +2551,11 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) int rc = 0; ENTRY; +#ifdef IPERM_FLAG_RCU + if (flags & IPERM_FLAG_RCU) + return -ECHILD; +#endif + /* as root inode are NOT getting validated in lookup operation, * need to do it before permission check. */ diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index b03ce4b..2ecdf02 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -673,7 +673,7 @@ struct lookup_intent *ll_convert_intent(struct open_intent *oit, int lookup_flags); int ll_lookup_it_finish(struct ptlrpc_request *request, struct lookup_intent *it, void *data); -struct dentry *ll_find_alias(struct inode *inode, struct dentry *de); +struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de); /* llite/rw.c */ int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); @@ -768,16 +768,21 @@ int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); int ll_fid2path(struct obd_export *exp, void *arg); /* llite/dcache.c */ + int ll_dops_init(struct dentry *de, int block, int init_sa); -extern cfs_spinlock_t ll_lookup_lock; extern struct dentry_operations ll_d_ops; void ll_intent_drop_lock(struct lookup_intent *); void ll_intent_release(struct lookup_intent *); -int ll_drop_dentry(struct dentry *dentry); -void ll_unhash_aliases(struct inode *); +void ll_invalidate_aliases(struct inode *); void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft); void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry); +#ifdef HAVE_D_COMPARE_7ARGS +int ll_dcompare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *d_name); +#else int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name); +#endif int ll_revalidate_it_finish(struct ptlrpc_request *request, struct lookup_intent *it, struct dentry *de); @@ -1459,28 +1464,57 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, *bits = it->d.lustre.it_lock_bits; } -static inline void ll_dentry_rehash(struct dentry *dentry, int locked) +static inline void ll_lock_dcache(struct inode *inode) { - if (!locked) { - cfs_spin_lock(&ll_lookup_lock); - spin_lock(&dcache_lock); - } - if (d_unhashed(dentry)) - d_rehash_cond(dentry, 0); - if (!locked) { - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); - } +#ifdef HAVE_DCACHE_LOCK + spin_lock(&dcache_lock); +#else + spin_lock(&inode->i_lock); +#endif } -static inline void ll_dentry_reset_flags(struct dentry *dentry, __u64 bits) +static inline void ll_unlock_dcache(struct inode *inode) { - if (bits & MDS_INODELOCK_LOOKUP && - dentry->d_flags & DCACHE_LUSTRE_INVALID) { - lock_dentry(dentry); - dentry->d_flags &= ~DCACHE_LUSTRE_INVALID; - unlock_dentry(dentry); - } +#ifdef HAVE_DCACHE_LOCK + spin_unlock(&dcache_lock); +#else + spin_unlock(&inode->i_lock); +#endif +} + +static inline int d_lustre_invalid(const struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_LUSTRE_INVALID; +} + +static inline void __d_lustre_invalidate(struct dentry *dentry) +{ + dentry->d_flags |= DCACHE_LUSTRE_INVALID; +} + +/* + * Mark dentry INVALID, if dentry refcount is zero (this is normally case for + * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later; + * else dput() of the last refcount will unhash this dentry and kill it. + */ +static inline void d_lustre_invalidate(struct dentry *dentry) +{ + CDEBUG(D_DENTRY, "invalidate dentry %.*s (%p) parent %p inode %p " + "refc %d\n", dentry->d_name.len, dentry->d_name.name, dentry, + dentry->d_parent, dentry->d_inode, d_refcount(dentry)); + + spin_lock(&dentry->d_lock); + __d_lustre_invalidate(dentry); + if (d_refcount(dentry) == 0) + __d_drop(dentry); + spin_unlock(&dentry->d_lock); +} + +static inline void d_lustre_revalidate(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_LUSTRE_INVALID; + spin_unlock(&dentry->d_lock); } #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0) diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 958cf24..78ba6bb 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -535,7 +535,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, cl_sb_init(sb); sb->s_root = d_alloc_root(root); - sb->s_root->d_op = &ll_d_root_ops; +#ifdef DCACHE_OP_HASH + d_set_d_op(sb->s_root, &ll_d_root_ops); + sb->s_d_op = &ll_d_ops; +#else + sb->s_root->d_op = &ll_d_root_ops; +#endif sbi->ll_sdev_orig = sb->s_dev; @@ -620,7 +625,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) " flags=0x%x, fsdata=%p, %d subdirs\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, - dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count), + dentry->d_parent, dentry->d_inode, d_refcount(dentry), dentry->d_flags, dentry->d_fsdata, subdirs); if (dentry->d_inode != NULL) ll_dump_inode(dentry->d_inode); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 4d94aa7..331db4e 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -166,35 +166,28 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, RETURN(inode); } -static void ll_drop_negative_dentry(struct inode *dir) +static void ll_invalidate_negative_children(struct inode *dir) { - struct dentry *dentry, *tmp_alias, *tmp_subdir; - - cfs_spin_lock(&ll_lookup_lock); - spin_lock(&dcache_lock); -restart: - list_for_each_entry_safe(dentry, tmp_alias, - &dir->i_dentry,d_alias) { - if (!list_empty(&dentry->d_subdirs)) { - struct dentry *child; - list_for_each_entry_safe(child, tmp_subdir, - &dentry->d_subdirs, - d_child) { - /* XXX Print some debug here? */ - if (!child->d_inode) - /* Negative dentry. If we were - dropping dcache lock, go - throught the list again */ - if (ll_drop_dentry(child)) - goto restart; - } - } - } - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); + struct dentry *dentry, *tmp_subdir; + + ll_lock_dcache(dir); + list_for_each_entry(dentry, &dir->i_dentry, d_alias) { + spin_lock(&dentry->d_lock); + if (!list_empty(&dentry->d_subdirs)) { + struct dentry *child; + + list_for_each_entry_safe(child, tmp_subdir, + &dentry->d_subdirs, + d_child) { + if (child->d_inode == NULL) + d_lustre_invalidate(child); + } + } + spin_unlock(&dentry->d_lock); + } + ll_unlock_dcache(dir); } - int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) { @@ -267,13 +260,13 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, CDEBUG(D_INODE, "invalidating inode %lu\n", inode->i_ino); truncate_inode_pages(inode->i_mapping, 0); - ll_drop_negative_dentry(inode); - } + ll_invalidate_negative_children(inode); + } - if (inode->i_sb->s_root && - inode != inode->i_sb->s_root->d_inode && - (bits & MDS_INODELOCK_LOOKUP)) - ll_unhash_aliases(inode); + if (inode->i_sb->s_root && + inode != inode->i_sb->s_root->d_inode && + (bits & MDS_INODELOCK_LOOKUP)) + ll_invalidate_aliases(inode); iput(inode); break; } @@ -329,110 +322,78 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) #endif } -static void ll_d_add(struct dentry *de, struct inode *inode) +/* + * try to reuse three types of dentry: + * 1. unhashed alias, this one is unhashed by d_invalidate (but it may be valid + * by concurrent .revalidate). + * 2. INVALID alias (common case for no valid ldlm lock held, but this flag may + * be cleared by others calling d_lustre_revalidate). + * 3. DISCONNECTED alias. + */ +static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) { - CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de); - /* d_instantiate */ - if (!list_empty(&de->d_alias)) { - spin_unlock(&dcache_lock); - CERROR("dentry %.*s %p alias next %p, prev %p\n", - de->d_name.len, de->d_name.name, de, - de->d_alias.next, de->d_alias.prev); - LBUG(); - } - if (inode) - list_add(&de->d_alias, &inode->i_dentry); - de->d_inode = inode; - - /* d_rehash */ - if (!d_unhashed(de)) { - spin_unlock(&dcache_lock); - CERROR("dentry %.*s %p hash next %p\n", - de->d_name.len, de->d_name.name, de, de->d_hash.next); - LBUG(); - } + struct dentry *alias, *discon_alias, *invalid_alias; + + if (list_empty(&inode->i_dentry)) + return NULL; + + discon_alias = invalid_alias = NULL; + + ll_lock_dcache(inode); + list_for_each_entry(alias, &inode->i_dentry, d_alias) { + LASSERT(alias != dentry); + + spin_lock(&alias->d_lock); + if (alias->d_flags & DCACHE_DISCONNECTED) + /* LASSERT(last_discon == NULL); LU-405, bz 20055 */ + discon_alias = alias; + else if (alias->d_parent == dentry->d_parent && + alias->d_name.hash == dentry->d_name.hash && + alias->d_name.len == dentry->d_name.len && + memcmp(alias->d_name.name, dentry->d_name.name, + dentry->d_name.len) == 0) + invalid_alias = alias; + spin_unlock(&alias->d_lock); + + if (invalid_alias) + break; + } + alias = invalid_alias ?: discon_alias ?: NULL; + if (alias) { + spin_lock(&alias->d_lock); + dget_dlock(alias); + spin_unlock(&alias->d_lock); + } + ll_unlock_dcache(inode); + + return alias; } -/* Search "inode"'s alias list for a dentry that has the same name and parent - * as de. If found, return it. If not found, return de. - * Lustre can't use d_add_unique because don't unhash aliases for directory - * in ll_revalidate_it. After revaliadate inode will be have hashed aliases - * and it triggers BUG_ON in d_instantiate_unique (bug #10954). +/* + * Similar to d_splice_alias(), but lustre treats DCACHE_LUSTRE_INVALID alias + * similar to DCACHE_DISCONNECTED, and tries to use it anyway. */ -struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) +struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de) { - struct list_head *tmp; - struct dentry *dentry; - struct dentry *last_discon = NULL; - - cfs_spin_lock(&ll_lookup_lock); - spin_lock(&dcache_lock); - list_for_each(tmp, &inode->i_dentry) { - dentry = list_entry(tmp, struct dentry, d_alias); - - /* We are called here with 'de' already on the aliases list. */ - if (unlikely(dentry == de)) { - CERROR("whoops\n"); - continue; - } - - if (dentry->d_flags & DCACHE_DISCONNECTED) { - /* LASSERT(last_discon == NULL); LU-405, bz 20055 */ - last_discon = dentry; - continue; - } - - if (dentry->d_parent != de->d_parent) - continue; - - if (dentry->d_name.hash != de->d_name.hash) - continue; - - if (dentry->d_name.len != de->d_name.len) - continue; - - if (memcmp(dentry->d_name.name, de->d_name.name, - de->d_name.len) != 0) - continue; - - dget_locked(dentry); - ll_dops_init(dentry, 0, 1); - ll_dentry_rehash(dentry, 1); - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); - iput(inode); - CDEBUG(D_DENTRY, "alias dentry %.*s (%p) parent %p inode %p " - "refc %d\n", de->d_name.len, de->d_name.name, de, - de->d_parent, de->d_inode, atomic_read(&de->d_count)); - return dentry; - } - - if (last_discon) { - CDEBUG(D_DENTRY, "Reuse disconnected dentry %p inode %p " - "refc %d\n", last_discon, last_discon->d_inode, - atomic_read(&last_discon->d_count)); - dget_locked(last_discon); - lock_dentry(last_discon); - last_discon->d_flags |= DCACHE_LUSTRE_INVALID; - unlock_dentry(last_discon); - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); - ll_dops_init(last_discon, 1, 1); - d_rehash(de); - d_move(last_discon, de); - iput(inode); - return last_discon; - } - lock_dentry(de); - de->d_flags |= DCACHE_LUSTRE_INVALID; - unlock_dentry(de); - ll_d_add(de, inode); - spin_unlock(&dcache_lock); - cfs_spin_unlock(&ll_lookup_lock); - - security_d_instantiate(de, inode); - d_rehash(de); - + struct dentry *new; + + if (inode) { + new = ll_find_alias(inode, de); + if (new) { + ll_dops_init(new, 1, 1); + d_move(new, de); + iput(inode); + CDEBUG(D_DENTRY, + "Reuse dentry %p inode %p refc %d flags %#x\n", + new, new->d_inode, d_refcount(new), new->d_flags); + return new; + } + } + __d_lustre_invalidate(de); + ll_dops_init(de, 1, 1); + d_add(de, inode); + CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n", + de, de->d_inode, d_refcount(de), de->d_flags); return de; } @@ -443,15 +404,13 @@ int ll_lookup_it_finish(struct ptlrpc_request *request, struct dentry **de = icbd->icbd_childp; struct inode *parent = icbd->icbd_parent; struct inode *inode = NULL; - int rc; - ENTRY; - - /* NB 1 request reference will be taken away by ll_intent_lock() - * when I return */ - if (!it_disposition(it, DISP_LOOKUP_NEG)) { - struct dentry *save = *de; - __u64 bits = 0; + __u64 bits = 0; + int rc; + ENTRY; + /* NB 1 request reference will be taken away by ll_intent_lock() + * when I return */ + if (!it_disposition(it, DISP_LOOKUP_NEG)) { rc = ll_prep_inode(&inode, request, (*de)->d_sb); if (rc) RETURN(rc); @@ -466,52 +425,25 @@ int ll_lookup_it_finish(struct ptlrpc_request *request, Everybody else who needs correct file size would call ll_glimpse_size or some equivalent themselves anyway. Also see bug 7198. */ - ll_dops_init(*de, 1, 1); - *de = ll_find_alias(inode, *de); - if (*de != save) { - struct ll_dentry_data *lld = ll_d2d(*de); - - /* just make sure the ll_dentry_data is ready */ - if (unlikely(lld == NULL)) - ll_dops_init(*de, 1, 1); - } - /* we have lookup look - unhide dentry */ - ll_dentry_reset_flags(*de, bits); - } else { + } + + *de = ll_splice_alias(inode, *de); + + if (!it_disposition(it, DISP_LOOKUP_NEG)) { + /* we have lookup look - unhide dentry */ + if (bits & MDS_INODELOCK_LOOKUP) + d_lustre_revalidate(*de); + } else { + /* Check that parent has UPDATE lock. */ struct lookup_intent parent_it = { .it_op = IT_GETATTR, .d.lustre.it_lock_handle = 0 }; - ll_dops_init(*de, 1, 1); - /* Check that parent has UPDATE lock. If there is none, we - cannot afford to hash this dentry (done by ll_d_add) as it - might get picked up later when UPDATE lock will appear; - otherwise, add ref to the parent UPDATE lock, to make sure - ll_d_add() undergoes with parent's UPDATE lock held */ if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &ll_i2info(parent)->lli_fid, NULL)) { - spin_lock(&dcache_lock); - ll_d_add(*de, NULL); - spin_unlock(&dcache_lock); - security_d_instantiate(*de, inode); - d_rehash(*de); + d_lustre_revalidate(*de); ll_intent_release(&parent_it); - } else { - /* negative lookup - and don't have update lock to - * parent */ - lock_dentry(*de); - (*de)->d_flags |= DCACHE_LUSTRE_INVALID; - unlock_dentry(*de); - - (*de)->d_inode = NULL; - /* We do not want to hash the dentry if don`t have a - * lock, but if this dentry is later used in d_move, - * we'd hit uninitialised list head d_hash, so we just - * do this to init d_hash field but leave dentry - * unhashed. (bug 10796). */ - d_rehash(*de); - d_drop(*de); - } + } } RETURN(0); @@ -654,6 +586,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, /* We are sure this is new dentry, so we need to create our private data and set the dentry ops */ ll_dops_init(dentry, 1, 1); + __d_lustre_invalidate(dentry); + d_add(dentry, NULL); RETURN(NULL); } it = ll_convert_intent(&nd->intent.open, nd->flags); @@ -778,13 +712,6 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, RETURN(PTR_ERR(inode)); d_instantiate(dentry, inode); - /* Negative dentry may be unhashed if parent does not have UPDATE lock, - * but some callers, e.g. do_coredump, expect dentry to be hashed after - * successful create. Hash it here. */ - spin_lock(&dcache_lock); - if (d_unhashed(dentry)) - d_rehash_cond(dentry, 0); - spin_unlock(&dcache_lock); RETURN(0); } @@ -840,7 +767,6 @@ static int ll_new_node(struct inode *dir, struct qstr *name, if (err) GOTO(err_exit, err); - d_drop(dchild); d_instantiate(dchild, inode); } EXIT; @@ -965,8 +891,6 @@ static int ll_link_generic(struct inode *src, struct inode *dir, ll_finish_md_op_data(op_data); if (err) GOTO(out, err); - if (dchild) - d_drop(dchild); ll_update_times(request, dir); ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1); diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index 6b0a59d..d911ab6 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -1645,18 +1645,14 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, struct lookup_intent it = { .it_op = IT_GETATTR, .d.lustre.it_lock_handle = entry->se_handle }; - struct ll_dentry_data *lld; - __u64 bits; - - rc = md_revalidate_lock(ll_i2mdexp(dir), &it, - ll_inode2fid(inode), &bits); - if (rc == 1) { - if ((*dentryp)->d_inode == NULL) { - *dentryp = ll_find_alias(inode, - *dentryp); - lld = ll_d2d(*dentryp); - if (unlikely(lld == NULL)) - ll_dops_init(*dentryp, 1, 1); + __u64 bits; + + rc = md_revalidate_lock(ll_i2mdexp(dir), &it, + ll_inode2fid(inode), &bits); + if (rc == 1) { + if ((*dentryp)->d_inode == NULL) { + *dentryp = ll_splice_alias(inode, + *dentryp); } else if ((*dentryp)->d_inode != inode) { /* revalidate, but inode is recreated */ CDEBUG(D_READA, @@ -1671,12 +1667,13 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp, ll_sai_unplug(sai, entry); RETURN(-ESTALE); } else { - ll_dentry_rehash(*dentryp, 0); - iput(inode); - } - entry->se_inode = NULL; + iput(inode); + } + entry->se_inode = NULL; - ll_dentry_reset_flags(*dentryp, bits); + if ((bits & MDS_INODELOCK_LOOKUP) && + d_lustre_invalid(*dentryp)) + d_lustre_revalidate(*dentryp); ll_intent_release(&it); } } diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 96970539..6573f35 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -122,8 +122,8 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, OBD_SET_CTXT_MAGIC(save); save->fs = get_fs(); - LASSERT(cfs_atomic_read(&cfs_fs_pwd(current->fs)->d_count)); - LASSERT(cfs_atomic_read(&new_ctx->pwd->d_count)); + LASSERT(d_refcount(cfs_fs_pwd(current->fs))); + LASSERT(d_refcount(new_ctx->pwd)); save->pwd = dget(cfs_fs_pwd(current->fs)); save->pwdmnt = mntget(cfs_fs_mnt(current->fs)); save->luc.luc_umask = cfs_curproc_umask(); -- 1.8.3.1