From: Artem Blagodarenko Date: Wed, 2 May 2012 14:05:33 +0000 (+0400) Subject: LU-903 ldlm: inode references moved to resource X-Git-Tag: 2.3.63~97 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=59c3911deb8357c4b6aad68d1b1fc81a8ad61b3f LU-903 ldlm: inode references moved to resource There is a race condition while get_attr after cancel_lru_locks and sysctl drop_caches. ll_clear_inode clears l_ast_data for ldlm lock and this lock can't be canceled because "inode == NULL". ll_mdc_blocking_ast finds such lock. As result DCACHE_LUSTRE_INVALID is not set and lookup returns wrong inode. This patch moves inode structure reference to "ldlm_lock::l_resource::lr_lvb_inode". This prevents from different inode references for same resource's lock. Xyratex-bug-id: MRP-338 Reviewed-by: Vitaly Fertman Reviewed-by: Alexey Lyashkov Signed-off-by: Artem Blagodarenko Change-Id: I4105b2aec38c90d3f5a20d1498a563192a74de55 Reviewed-on: http://review.whamcloud.com/2627 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Keith Mannthey Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index ac4414e..c01069a 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -494,6 +494,9 @@ static inline void set_fs(mm_segment_t seg) #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) #define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) +struct inode *igrab(struct inode *inode); +void iput(struct inode *inode); + #include #include #include diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 7a91186..74a8104 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1515,8 +1515,7 @@ struct lookup_intent; struct md_ops { int (*m_getstatus)(struct obd_export *, struct lu_fid *, struct obd_capa **); - int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *, - ldlm_iterator_t, void *); + int (*m_null_inode)(struct obd_export *, const struct lu_fid *); int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *, ldlm_iterator_t, void *); int (*m_close)(struct obd_export *, struct md_op_data *, diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index c39825c..f710646 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1870,15 +1870,14 @@ static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } -static inline int md_change_cbdata(struct obd_export *exp, - const struct lu_fid *fid, - ldlm_iterator_t it, void *data) +static inline int md_null_inode(struct obd_export *exp, + const struct lu_fid *fid) { int rc; ENTRY; - EXP_CHECK_MD_OP(exp, change_cbdata); - EXP_MD_COUNTER_INCREMENT(exp, change_cbdata); - rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data); + EXP_CHECK_MD_OP(exp, null_inode); + EXP_MD_COUNTER_INCREMENT(exp, null_inode); + rc = MDP(exp->exp_obd, null_inode)(exp, fid); RETURN(rc); } diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index c587ad5..8679ad9 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -295,6 +295,7 @@ int llu_iop_lookup(struct pnode *pnode, struct intent *intnt, const char *path); void unhook_stale_inode(struct pnode *pno); +struct inode *llu_inode_from_resource_lock(struct ldlm_lock *lock); struct inode *llu_inode_from_lock(struct ldlm_lock *lock); int llu_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index d43fe75..a2ce417 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -128,12 +128,16 @@ int llu_md_blocking_ast(struct ldlm_lock *lock, } break; case LDLM_CB_CANCELING: { - struct inode *inode = llu_inode_from_lock(lock); + struct inode *inode = llu_inode_from_resource_lock(lock); struct llu_inode_info *lli; struct intnl_stat *st; __u64 bits = lock->l_policy_data.l_inodebits.bits; struct lu_fid *fid; + /* Inode is set to lock->l_resource->lr_lvb_inode + * for mdc - bug 24555 */ + LASSERT(lock->l_ast_data == NULL); + /* Invalidate all dentries associated with this inode */ if (inode == NULL) break; @@ -380,6 +384,21 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, RETURN(0); } +struct inode *llu_inode_from_resource_lock(struct ldlm_lock *lock) +{ + struct inode *inode; + lock_res_and_lock(lock); + + if (lock->l_resource->lr_lvb_inode) { + inode = (struct inode *)lock->l_resource->lr_lvb_inode; + I_REF(inode); + } else + inode = NULL; + + unlock_res_and_lock(lock); + return inode; +} + struct inode *llu_inode_from_lock(struct ldlm_lock *lock) { struct inode *inode; diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 2d52736..2db5719 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -516,8 +516,7 @@ void llu_clear_inode(struct inode *inode) inode); lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK; - md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), - null_if_equal, inode); + md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode)); lsm = ccc_inode_lsm_get(inode); if (lsm != NULL) diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 1a8d156..5f21f12 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -191,7 +191,8 @@ static int ll_ddelete(HAVE_D_DELETE_CONST struct dentry *de) /* Disable this piece of code temproarily because this is called * inside dcache_lock so it's not appropriate to do lots of work - * here. */ + * here. ATTENTION: Before this piece of code enabling, LU-2487 must be + * resolved. */ #if 0 /* if not ldlm lock for this inode, set i_nlink to 0 so that * this inode can be recycled later b=20433 */ diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 466d259..a3a514c 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -356,32 +356,43 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, mode = LCK_PR; rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED, ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh); - if (!rc) { - struct ldlm_enqueue_info einfo = { LDLM_IBITS, mode, - ll_md_blocking_ast, ldlm_completion_ast, - NULL, NULL, dir }; - struct lookup_intent it = { .it_op = IT_READDIR }; - struct ptlrpc_request *request; - struct md_op_data *op_data; - - op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return (void *)op_data; - - rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it, - op_data, &lockh, NULL, 0, NULL, 0); - - ll_finish_md_op_data(op_data); + if (!rc) { + struct ldlm_enqueue_info einfo = {.ei_type = LDLM_IBITS, + .ei_mode = mode, + .ei_cb_bl = + ll_md_blocking_ast, + .ei_cb_cp = + ldlm_completion_ast, + .ei_cb_gl = NULL, + .ei_cb_wg = NULL, + .ei_cbdata = NULL}; + struct lookup_intent it = { .it_op = IT_READDIR }; + struct ptlrpc_request *request; + struct md_op_data *op_data; + + op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + return (void *)op_data; + + rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it, + op_data, &lockh, NULL, 0, NULL, 0); + + ll_finish_md_op_data(op_data); + + request = (struct ptlrpc_request *)it.d.lustre.it_data; + if (request) + ptlrpc_req_finished(request); + if (rc < 0) { + CERROR("lock enqueue: "DFID" at "LPU64": rc %d\n", + PFID(ll_inode2fid(dir)), hash, rc); + return ERR_PTR(rc); + } - request = (struct ptlrpc_request *)it.d.lustre.it_data; - if (request) - ptlrpc_req_finished(request); - if (rc < 0) { - CERROR("lock enqueue: "DFID" at "LPU64": rc %d\n", - PFID(ll_inode2fid(dir)), hash, rc); - return ERR_PTR(rc); - } + CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n", + dir, dir->i_ino, dir->i_generation); + md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, + &it.d.lustre.it_lock_handle, dir, NULL); } else { /* for cross-ref object, l_ast_data of the lock may not be set, * we reset it here */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 81b36ce..d763cf7 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -3247,7 +3247,7 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) .ei_mode = LCK_CR, .ei_cb_bl = ll_md_blocking_ast, .ei_cb_cp = ldlm_completion_ast, - .ei_cbdata = inode }; + .ei_cbdata = NULL }; int rc; ENTRY; @@ -3310,6 +3310,8 @@ again: ll_finish_md_op_data(op_data); + md_set_lock_data(sbi->ll_md_exp, &it.d.lustre.it_lock_handle, inode, NULL); + mode = it.d.lustre.it_lock_mode; it.d.lustre.it_lock_mode = 0; ll_intent_drop_lock(&it); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 9329d6c..6d41614 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -834,6 +834,7 @@ void ll_lli_init(struct ll_inode_info *lli); int ll_fill_super(struct super_block *sb, struct vfsmount *mnt); void ll_put_super(struct super_block *sb); void ll_kill_super(struct super_block *sb); +struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock); struct inode *ll_inode_from_lock(struct ldlm_lock *lock); void ll_clear_inode(struct inode *inode); int ll_setattr_raw(struct dentry *dentry, struct iattr *attr); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index ad360f1..9ac70e5 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1124,6 +1124,31 @@ void ll_put_super(struct super_block *sb) EXIT; } /* client_put_super */ +struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) +{ + struct inode *inode = NULL; + + /* NOTE: we depend on atomic igrab() -bzzz */ + lock_res_and_lock(lock); + if (lock->l_resource->lr_lvb_inode) { + struct ll_inode_info * lli; + lli = ll_i2info(lock->l_resource->lr_lvb_inode); + if (lli->lli_inode_magic == LLI_INODE_MAGIC) { + inode = igrab(lock->l_resource->lr_lvb_inode); + } else { + inode = lock->l_resource->lr_lvb_inode; + LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO : + D_WARNING, lock, "lr_lvb_inode %p is " + "bogus: magic %08x", + lock->l_resource->lr_lvb_inode, + lli->lli_inode_magic); + inode = NULL; + } + } + unlock_res_and_lock(lock); + return inode; +} + struct inode *ll_inode_from_lock(struct ldlm_lock *lock) { struct inode *inode = NULL; @@ -1146,18 +1171,6 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) return inode; } -static int null_if_equal(struct ldlm_lock *lock, void *data) -{ - if (data == lock->l_ast_data) { - lock->l_ast_data = NULL; - - if (lock->l_req_mode != lock->l_granted_mode) - LDLM_ERROR(lock,"clearing inode with ungranted lock"); - } - - return LDLM_ITER_CONTINUE; -} - void ll_clear_inode(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1175,8 +1188,7 @@ void ll_clear_inode(struct inode *inode) } ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; - md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), - null_if_equal, inode); + md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode)); LASSERT(!lli->lli_open_fd_write_count); LASSERT(!lli->lli_open_fd_read_count); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 3bcf3ff..1477790 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -212,12 +212,16 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } break; case LDLM_CB_CANCELING: { - struct inode *inode = ll_inode_from_lock(lock); + struct inode *inode = ll_inode_from_resource_lock(lock); struct ll_inode_info *lli; __u64 bits = lock->l_policy_data.l_inodebits.bits; struct lu_fid *fid; ldlm_mode_t mode = lock->l_req_mode; + /* Inode is set to lock->l_resource->lr_lvb_inode + * for mdc - bug 24555 */ + LASSERT(lock->l_ast_data == NULL); + /* Invalidate all dentries associated with this inode */ if (inode == NULL) break; diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 4a5e147..ba5962e 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1325,8 +1325,7 @@ static int lmv_getattr(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } -static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, - ldlm_iterator_t it, void *data) +static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1348,7 +1347,7 @@ static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, for (i = 0; i < lmv->desc.ld_tgt_count; i++) { if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL) continue; - md_change_cbdata(lmv->tgts[i]->ltd_exp, fid, it, data); + md_null_inode(lmv->tgts[i]->ltd_exp, fid); } RETURN(0); @@ -2614,7 +2613,7 @@ struct obd_ops lmv_obd_ops = { struct md_ops lmv_md_ops = { .m_getstatus = lmv_getstatus, - .m_change_cbdata = lmv_change_cbdata, + .m_null_inode = lmv_null_inode, .m_find_cbdata = lmv_find_cbdata, .m_close = lmv_close, .m_create = lmv_create, diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 13f0501..b6e2846 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -83,8 +83,7 @@ void mdc_exit_request(struct client_obd *cli); int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data, __u64 *bits); -int mdc_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, - ldlm_iterator_t it, void *data); +int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid); int mdc_find_cbdata(struct obd_export *exp, const struct lu_fid *fid, ldlm_iterator_t it, void *data); diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index c17feed..1e3b9b1 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -117,7 +117,8 @@ EXPORT_SYMBOL(it_open_error); int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data, __u64 *bits) { - struct ldlm_lock *lock; + struct ldlm_lock *lock; + struct inode *new_inode = data; ENTRY; if(bits) @@ -131,18 +132,18 @@ int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data, LASSERT(lock != NULL); lock_res_and_lock(lock); #ifdef __KERNEL__ - if (lock->l_ast_data && lock->l_ast_data != data) { - struct inode *new_inode = data; - struct inode *old_inode = lock->l_ast_data; - LASSERTF(old_inode->i_state & I_FREEING, - "Found existing inode %p/%lu/%u state %lu in lock: " - "setting data to %p/%lu/%u\n", old_inode, - old_inode->i_ino, old_inode->i_generation, - old_inode->i_state, - new_inode, new_inode->i_ino, new_inode->i_generation); - } + if (lock->l_resource->lr_lvb_inode && + lock->l_resource->lr_lvb_inode != data) { + struct inode *old_inode = lock->l_resource->lr_lvb_inode; + LASSERTF(old_inode->i_state & I_FREEING, + "Found existing inode %p/%lu/%u state %lu in lock: " + "setting data to %p/%lu/%u\n", old_inode, + old_inode->i_ino, old_inode->i_generation, + old_inode->i_state, + new_inode, new_inode->i_ino, new_inode->i_generation); + } #endif - lock->l_ast_data = data; + lock->l_resource->lr_lvb_inode = new_inode; if (bits) *bits = lock->l_policy_data.l_inodebits.bits; @@ -186,19 +187,28 @@ int mdc_cancel_unused(struct obd_export *exp, RETURN(rc); } -int mdc_change_cbdata(struct obd_export *exp, - const struct lu_fid *fid, - ldlm_iterator_t it, void *data) +int mdc_null_inode(struct obd_export *exp, + const struct lu_fid *fid) { - struct ldlm_res_id res_id; - ENTRY; + struct ldlm_res_id res_id; + struct ldlm_resource *res; + struct ldlm_namespace *ns = class_exp2obd(exp)->obd_namespace; + ENTRY; - fid_build_reg_res_name(fid, &res_id); - ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, - &res_id, it, data); + LASSERTF(ns != NULL, "no namespace passed\n"); - EXIT; - return 0; + fid_build_reg_res_name(fid, &res_id); + + res = ldlm_resource_get(ns, NULL, &res_id, 0, 0); + if(res == NULL) + RETURN(0); + + lock_res(res); + res->lr_lvb_inode = NULL; + unlock_res(res); + + ldlm_resource_putref(res); + RETURN(0); } /* find any ldlm lock of the inode in mdc diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 399c268..1a30a0b 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -2395,6 +2395,18 @@ static int mdc_cancel_for_recovery(struct ldlm_lock *lock) RETURN(1); } +static int mdc_resource_inode_free(struct ldlm_resource *res) +{ + if (res->lr_lvb_inode) + res->lr_lvb_inode = NULL; + + return 0; +} + +struct ldlm_valblock_ops inode_lvbo = { + lvbo_free: mdc_resource_inode_free +}; + static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) { struct client_obd *cli = &obd->u.cli; @@ -2424,6 +2436,8 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery); + obd->obd_namespace->ns_lvbo = &inode_lvbo; + rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL); if (rc) { mdc_cleanup(obd); @@ -2699,7 +2713,7 @@ struct obd_ops mdc_obd_ops = { struct md_ops mdc_md_ops = { .m_getstatus = mdc_getstatus, - .m_change_cbdata = mdc_change_cbdata, + .m_null_inode = mdc_null_inode, .m_find_cbdata = mdc_find_cbdata, .m_close = mdc_close, .m_create = mdc_create, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index e1227d9..8aeb3c3 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1788,7 +1788,7 @@ do { \ void lprocfs_init_mps_stats(int num_private_stats, struct lprocfs_stats *stats) { LPROCFS_MD_OP_INIT(num_private_stats, stats, getstatus); - LPROCFS_MD_OP_INIT(num_private_stats, stats, change_cbdata); + LPROCFS_MD_OP_INIT(num_private_stats, stats, null_inode); LPROCFS_MD_OP_INIT(num_private_stats, stats, find_cbdata); LPROCFS_MD_OP_INIT(num_private_stats, stats, close); LPROCFS_MD_OP_INIT(num_private_stats, stats, create);