Whamcloud - gitweb
LU-903 ldlm: inode references moved to resource
authorArtem Blagodarenko <artem_blagodarenko@xyratex.com>
Wed, 2 May 2012 14:05:33 +0000 (18:05 +0400)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 7 Mar 2013 03:20:55 +0000 (22:20 -0500)
There is a race condition while get_attr after cancel_lru_locks
and sysctl drop_caches. ll_clear_inode clears l_ast_data for
ldlm lock and this lock can't be canceled because "inode == NULL".
ll_mdc_blocking_ast finds such lock. As result DCACHE_LUSTRE_INVALID
is not set and lookup returns wrong inode.

This patch moves inode structure reference to
"ldlm_lock::l_resource::lr_lvb_inode". This prevents from different
inode references for same resource's lock.

Xyratex-bug-id: MRP-338
Reviewed-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-by: Alexey Lyashkov <alexey_lyashkov@xyratex.com>
Signed-off-by: Artem Blagodarenko <artem_blagodarenko@xyratex.com>
Change-Id: I4105b2aec38c90d3f5a20d1498a563192a74de55
Reviewed-on: http://review.whamcloud.com/2627
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
17 files changed:
lustre/include/liblustre.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/liblustre/llite_lib.h
lustre/liblustre/namei.c
lustre/liblustre/super.c
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lmv/lmv_obd.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_request.c
lustre/obdclass/lprocfs_status.c

index ac4414e..c01069a 100644 (file)
@@ -494,6 +494,9 @@ static inline void set_fs(mm_segment_t seg)
 #define S_IRWXUGO       (S_IRWXU|S_IRWXG|S_IRWXO)
 #define S_IALLUGO       (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
 
+struct inode *igrab(struct inode *inode);
+void iput(struct inode *inode);
+
 #include <obd_support.h>
 #include <lustre/lustre_idl.h>
 #include <lustre_lib.h>
index 7a91186..74a8104 100644 (file)
@@ -1515,8 +1515,7 @@ struct lookup_intent;
 struct md_ops {
         int (*m_getstatus)(struct obd_export *, struct lu_fid *,
                            struct obd_capa **);
-        int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
-                               ldlm_iterator_t, void *);
+        int (*m_null_inode)(struct obd_export *, const struct lu_fid *);
         int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
                              ldlm_iterator_t, void *);
         int (*m_close)(struct obd_export *, struct md_op_data *,
index c39825c..f710646 100644 (file)
@@ -1870,15 +1870,14 @@ static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data,
         RETURN(rc);
 }
 
-static inline int md_change_cbdata(struct obd_export *exp,
-                                   const struct lu_fid *fid,
-                                   ldlm_iterator_t it, void *data)
+static inline int md_null_inode(struct obd_export *exp,
+                                   const struct lu_fid *fid)
 {
         int rc;
         ENTRY;
-        EXP_CHECK_MD_OP(exp, change_cbdata);
-        EXP_MD_COUNTER_INCREMENT(exp, change_cbdata);
-        rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data);
+        EXP_CHECK_MD_OP(exp, null_inode);
+        EXP_MD_COUNTER_INCREMENT(exp, null_inode);
+        rc = MDP(exp->exp_obd, null_inode)(exp, fid);
         RETURN(rc);
 }
 
index c587ad5..8679ad9 100644 (file)
@@ -295,6 +295,7 @@ int llu_iop_lookup(struct pnode *pnode,
                    struct intent *intnt,
                    const char *path);
 void unhook_stale_inode(struct pnode *pno);
+struct inode *llu_inode_from_resource_lock(struct ldlm_lock *lock);
 struct inode *llu_inode_from_lock(struct ldlm_lock *lock);
 int llu_md_blocking_ast(struct ldlm_lock *lock,
                         struct ldlm_lock_desc *desc,
index d43fe75..a2ce417 100644 (file)
@@ -128,12 +128,16 @@ int llu_md_blocking_ast(struct ldlm_lock *lock,
                 }
                 break;
         case LDLM_CB_CANCELING: {
-                struct inode *inode = llu_inode_from_lock(lock);
+               struct inode *inode = llu_inode_from_resource_lock(lock);
                 struct llu_inode_info *lli;
                 struct intnl_stat *st;
                 __u64 bits = lock->l_policy_data.l_inodebits.bits;
                 struct lu_fid *fid;
 
+               /* Inode is set to lock->l_resource->lr_lvb_inode
+               * for mdc - bug 24555 */
+               LASSERT(lock->l_ast_data == NULL);
+
                 /* Invalidate all dentries associated with this inode */
                 if (inode == NULL)
                         break;
@@ -380,6 +384,21 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
         RETURN(0);
 }
 
+struct inode *llu_inode_from_resource_lock(struct ldlm_lock *lock)
+{
+       struct inode *inode;
+       lock_res_and_lock(lock);
+
+       if (lock->l_resource->lr_lvb_inode) {
+               inode = (struct inode *)lock->l_resource->lr_lvb_inode;
+               I_REF(inode);
+       } else
+               inode = NULL;
+
+       unlock_res_and_lock(lock);
+       return inode;
+}
+
 struct inode *llu_inode_from_lock(struct ldlm_lock *lock)
 {
         struct inode *inode;
index 2d52736..2db5719 100644 (file)
@@ -516,8 +516,7 @@ void llu_clear_inode(struct inode *inode)
                inode);
 
         lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
-        md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
-                         null_if_equal, inode);
+       md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
 
        lsm = ccc_inode_lsm_get(inode);
        if (lsm != NULL)
index 1a8d156..5f21f12 100644 (file)
@@ -191,7 +191,8 @@ static int ll_ddelete(HAVE_D_DELETE_CONST struct dentry *de)
 
        /* Disable this piece of code temproarily because this is called
         * inside dcache_lock so it's not appropriate to do lots of work
-        * here. */
+        * here. ATTENTION: Before this piece of code enabling, LU-2487 must be
+        * resolved. */
 #if 0
        /* if not ldlm lock for this inode, set i_nlink to 0 so that
         * this inode can be recycled later b=20433 */
index 466d259..a3a514c 100644 (file)
@@ -356,32 +356,43 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
         mode = LCK_PR;
         rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
                            ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
-        if (!rc) {
-                struct ldlm_enqueue_info einfo = { LDLM_IBITS, mode,
-                       ll_md_blocking_ast, ldlm_completion_ast,
-                       NULL, NULL, dir };
-                struct lookup_intent it = { .it_op = IT_READDIR };
-                struct ptlrpc_request *request;
-                struct md_op_data *op_data;
-
-                op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0,
-                                             LUSTRE_OPC_ANY, NULL);
-                if (IS_ERR(op_data))
-                        return (void *)op_data;
-
-                rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
-                                op_data, &lockh, NULL, 0, NULL, 0);
-
-                ll_finish_md_op_data(op_data);
+       if (!rc) {
+               struct ldlm_enqueue_info einfo = {.ei_type = LDLM_IBITS,
+                                                 .ei_mode = mode,
+                                                 .ei_cb_bl =
+                                                 ll_md_blocking_ast,
+                                                 .ei_cb_cp =
+                                                 ldlm_completion_ast,
+                                                 .ei_cb_gl = NULL,
+                                                 .ei_cb_wg = NULL,
+                                                 .ei_cbdata = NULL};
+               struct lookup_intent it = { .it_op = IT_READDIR };
+               struct ptlrpc_request *request;
+               struct md_op_data *op_data;
+
+               op_data = ll_prep_md_op_data(NULL, dir, NULL, NULL, 0, 0,
+               LUSTRE_OPC_ANY, NULL);
+               if (IS_ERR(op_data))
+                       return (void *)op_data;
+
+               rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
+                               op_data, &lockh, NULL, 0, NULL, 0);
+
+               ll_finish_md_op_data(op_data);
+
+               request = (struct ptlrpc_request *)it.d.lustre.it_data;
+               if (request)
+                       ptlrpc_req_finished(request);
+               if (rc < 0) {
+                       CERROR("lock enqueue: "DFID" at "LPU64": rc %d\n",
+                               PFID(ll_inode2fid(dir)), hash, rc);
+                       return ERR_PTR(rc);
+               }
 
-                request = (struct ptlrpc_request *)it.d.lustre.it_data;
-                if (request)
-                        ptlrpc_req_finished(request);
-                if (rc < 0) {
-                        CERROR("lock enqueue: "DFID" at "LPU64": rc %d\n",
-                               PFID(ll_inode2fid(dir)), hash, rc);
-                        return ERR_PTR(rc);
-                }
+               CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
+                      dir, dir->i_ino, dir->i_generation);
+               md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
+                                &it.d.lustre.it_lock_handle, dir, NULL);
         } else {
                 /* for cross-ref object, l_ast_data of the lock may not be set,
                  * we reset it here */
index 81b36ce..d763cf7 100644 (file)
@@ -3247,7 +3247,7 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
                                           .ei_mode = LCK_CR,
                                           .ei_cb_bl = ll_md_blocking_ast,
                                           .ei_cb_cp = ldlm_completion_ast,
-                                          .ei_cbdata = inode };
+                                          .ei_cbdata = NULL };
        int rc;
        ENTRY;
 
@@ -3310,6 +3310,8 @@ again:
 
        ll_finish_md_op_data(op_data);
 
+       md_set_lock_data(sbi->ll_md_exp, &it.d.lustre.it_lock_handle, inode, NULL);
+
        mode = it.d.lustre.it_lock_mode;
        it.d.lustre.it_lock_mode = 0;
        ll_intent_drop_lock(&it);
index 9329d6c..6d41614 100644 (file)
@@ -834,6 +834,7 @@ void ll_lli_init(struct ll_inode_info *lli);
 int ll_fill_super(struct super_block *sb, struct vfsmount *mnt);
 void ll_put_super(struct super_block *sb);
 void ll_kill_super(struct super_block *sb);
+struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
 struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
 void ll_clear_inode(struct inode *inode);
 int ll_setattr_raw(struct dentry *dentry, struct iattr *attr);
index ad360f1..9ac70e5 100644 (file)
@@ -1124,6 +1124,31 @@ void ll_put_super(struct super_block *sb)
         EXIT;
 } /* client_put_super */
 
+struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
+{
+       struct inode *inode = NULL;
+
+       /* NOTE: we depend on atomic igrab() -bzzz */
+       lock_res_and_lock(lock);
+       if (lock->l_resource->lr_lvb_inode) {
+               struct ll_inode_info * lli;
+               lli = ll_i2info(lock->l_resource->lr_lvb_inode);
+               if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
+                       inode = igrab(lock->l_resource->lr_lvb_inode);
+               } else {
+                       inode = lock->l_resource->lr_lvb_inode;
+                       LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ?  D_INFO :
+                                        D_WARNING, lock, "lr_lvb_inode %p is "
+                                        "bogus: magic %08x",
+                                        lock->l_resource->lr_lvb_inode,
+                                        lli->lli_inode_magic);
+                       inode = NULL;
+               }
+       }
+       unlock_res_and_lock(lock);
+       return inode;
+}
+
 struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
 {
         struct inode *inode = NULL;
@@ -1146,18 +1171,6 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
         return inode;
 }
 
-static int null_if_equal(struct ldlm_lock *lock, void *data)
-{
-        if (data == lock->l_ast_data) {
-                lock->l_ast_data = NULL;
-
-                if (lock->l_req_mode != lock->l_granted_mode)
-                        LDLM_ERROR(lock,"clearing inode with ungranted lock");
-        }
-
-        return LDLM_ITER_CONTINUE;
-}
-
 void ll_clear_inode(struct inode *inode)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
@@ -1175,8 +1188,7 @@ void ll_clear_inode(struct inode *inode)
         }
 
         ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
-        md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
-                         null_if_equal, inode);
+       md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
 
         LASSERT(!lli->lli_open_fd_write_count);
         LASSERT(!lli->lli_open_fd_read_count);
index 3bcf3ff..1477790 100644 (file)
@@ -212,12 +212,16 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 }
                 break;
         case LDLM_CB_CANCELING: {
-                struct inode *inode = ll_inode_from_lock(lock);
+               struct inode *inode = ll_inode_from_resource_lock(lock);
                 struct ll_inode_info *lli;
                 __u64 bits = lock->l_policy_data.l_inodebits.bits;
                 struct lu_fid *fid;
                 ldlm_mode_t mode = lock->l_req_mode;
 
+               /* Inode is set to lock->l_resource->lr_lvb_inode
+                * for mdc - bug 24555 */
+               LASSERT(lock->l_ast_data == NULL);
+
                 /* Invalidate all dentries associated with this inode */
                 if (inode == NULL)
                         break;
index 4a5e147..ba5962e 100644 (file)
@@ -1325,8 +1325,7 @@ static int lmv_getattr(struct obd_export *exp, struct md_op_data *op_data,
         RETURN(rc);
 }
 
-static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
-                             ldlm_iterator_t it, void *data)
+static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
 {
         struct obd_device   *obd = exp->exp_obd;
         struct lmv_obd      *lmv = &obd->u.lmv;
@@ -1348,7 +1347,7 @@ static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
        for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
                if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL)
                        continue;
-               md_change_cbdata(lmv->tgts[i]->ltd_exp, fid, it, data);
+               md_null_inode(lmv->tgts[i]->ltd_exp, fid);
        }
 
        RETURN(0);
@@ -2614,7 +2613,7 @@ struct obd_ops lmv_obd_ops = {
 
 struct md_ops lmv_md_ops = {
         .m_getstatus            = lmv_getstatus,
-        .m_change_cbdata        = lmv_change_cbdata,
+        .m_null_inode          = lmv_null_inode,
         .m_find_cbdata          = lmv_find_cbdata,
         .m_close                = lmv_close,
         .m_create               = lmv_create,
index 13f0501..b6e2846 100644 (file)
@@ -83,8 +83,7 @@ void mdc_exit_request(struct client_obd *cli);
 int mdc_set_lock_data(struct obd_export *exp,
                       __u64 *lockh, void *data, __u64 *bits);
 
-int mdc_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
-                      ldlm_iterator_t it, void *data);
+int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid);
 
 int mdc_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
                     ldlm_iterator_t it, void *data);
index c17feed..1e3b9b1 100644 (file)
@@ -117,7 +117,8 @@ EXPORT_SYMBOL(it_open_error);
 int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
                       __u64 *bits)
 {
-        struct ldlm_lock *lock;
+       struct ldlm_lock *lock;
+       struct inode *new_inode = data;
         ENTRY;
 
         if(bits)
@@ -131,18 +132,18 @@ int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
         LASSERT(lock != NULL);
         lock_res_and_lock(lock);
 #ifdef __KERNEL__
-        if (lock->l_ast_data && lock->l_ast_data != data) {
-                struct inode *new_inode = data;
-                struct inode *old_inode = lock->l_ast_data;
-                LASSERTF(old_inode->i_state & I_FREEING,
-                         "Found existing inode %p/%lu/%u state %lu in lock: "
-                         "setting data to %p/%lu/%u\n", old_inode,
-                         old_inode->i_ino, old_inode->i_generation,
-                         old_inode->i_state,
-                         new_inode, new_inode->i_ino, new_inode->i_generation);
-        }
+       if (lock->l_resource->lr_lvb_inode &&
+           lock->l_resource->lr_lvb_inode != data) {
+               struct inode *old_inode = lock->l_resource->lr_lvb_inode;
+               LASSERTF(old_inode->i_state & I_FREEING,
+                        "Found existing inode %p/%lu/%u state %lu in lock: "
+                        "setting data to %p/%lu/%u\n", old_inode,
+                        old_inode->i_ino, old_inode->i_generation,
+                        old_inode->i_state,
+                        new_inode, new_inode->i_ino, new_inode->i_generation);
+       }
 #endif
-        lock->l_ast_data = data;
+       lock->l_resource->lr_lvb_inode = new_inode;
         if (bits)
                 *bits = lock->l_policy_data.l_inodebits.bits;
 
@@ -186,19 +187,28 @@ int mdc_cancel_unused(struct obd_export *exp,
         RETURN(rc);
 }
 
-int mdc_change_cbdata(struct obd_export *exp,
-                      const struct lu_fid *fid,
-                      ldlm_iterator_t it, void *data)
+int mdc_null_inode(struct obd_export *exp,
+                  const struct lu_fid *fid)
 {
-        struct ldlm_res_id res_id;
-        ENTRY;
+       struct ldlm_res_id res_id;
+       struct ldlm_resource *res;
+       struct ldlm_namespace *ns = class_exp2obd(exp)->obd_namespace;
+       ENTRY;
 
-        fid_build_reg_res_name(fid, &res_id);
-        ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace,
-                              &res_id, it, data);
+       LASSERTF(ns != NULL, "no namespace passed\n");
 
-        EXIT;
-        return 0;
+       fid_build_reg_res_name(fid, &res_id);
+
+       res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
+       if(res == NULL)
+               RETURN(0);
+
+       lock_res(res);
+       res->lr_lvb_inode = NULL;
+       unlock_res(res);
+
+       ldlm_resource_putref(res);
+       RETURN(0);
 }
 
 /* find any ldlm lock of the inode in mdc
index 399c268..1a30a0b 100644 (file)
@@ -2395,6 +2395,18 @@ static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
         RETURN(1);
 }
 
+static int mdc_resource_inode_free(struct ldlm_resource *res)
+{
+       if (res->lr_lvb_inode)
+               res->lr_lvb_inode = NULL;
+
+       return 0;
+}
+
+struct ldlm_valblock_ops inode_lvbo = {
+       lvbo_free: mdc_resource_inode_free
+};
+
 static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 {
         struct client_obd *cli = &obd->u.cli;
@@ -2424,6 +2436,8 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 
         ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
 
+       obd->obd_namespace->ns_lvbo = &inode_lvbo;
+
         rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
         if (rc) {
                 mdc_cleanup(obd);
@@ -2699,7 +2713,7 @@ struct obd_ops mdc_obd_ops = {
 
 struct md_ops mdc_md_ops = {
         .m_getstatus        = mdc_getstatus,
-        .m_change_cbdata    = mdc_change_cbdata,
+        .m_null_inode      = mdc_null_inode,
         .m_find_cbdata      = mdc_find_cbdata,
         .m_close            = mdc_close,
         .m_create           = mdc_create,
index e1227d9..8aeb3c3 100644 (file)
@@ -1788,7 +1788,7 @@ do {                                                                    \
 void lprocfs_init_mps_stats(int num_private_stats, struct lprocfs_stats *stats)
 {
         LPROCFS_MD_OP_INIT(num_private_stats, stats, getstatus);
-        LPROCFS_MD_OP_INIT(num_private_stats, stats, change_cbdata);
+        LPROCFS_MD_OP_INIT(num_private_stats, stats, null_inode);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, find_cbdata);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, close);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, create);