Whamcloud - gitweb
LU-7531 osp: allow few requests during recovery 39/17539/3
authorDi Wang <di.wang@intel.com>
Tue, 8 Dec 2015 17:28:14 +0000 (09:28 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 21 Dec 2015 12:41:27 +0000 (12:41 +0000)
Allow OSP requests during recovery, so recovery
threads will not be blocked if the remote target
is also in recovery status, otherwise it might
cause deadlock.

Add replay-single.sh 117 to verify this case.

Signed-off-by: Di Wang <di.wang@intel.com>
Change-Id: Iad3b6fd382d76c9bc042096c51cfac0a0d33091d
Reviewed-on: http://review.whamcloud.com/17539
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_request.c
lustre/osp/osp_md_object.c
lustre/osp/osp_object.c
lustre/tests/replay-single.sh

index dd518e5..13e3e59 100644 (file)
@@ -2447,14 +2447,19 @@ static int target_start_recovery_thread(struct lu_target *lut,
        struct obd_device *obd = lut->lut_obd;
        int rc = 0;
        struct target_recovery_data *trd = &obd->obd_recovery_data;
        struct obd_device *obd = lut->lut_obd;
        int rc = 0;
        struct target_recovery_data *trd = &obd->obd_recovery_data;
+       int index;
 
        memset(trd, 0, sizeof(*trd));
        init_completion(&trd->trd_starting);
        init_completion(&trd->trd_finishing);
        trd->trd_recovery_handler = handler;
 
 
        memset(trd, 0, sizeof(*trd));
        init_completion(&trd->trd_starting);
        init_completion(&trd->trd_finishing);
        trd->trd_recovery_handler = handler;
 
+       rc = server_name2index(obd->obd_name, &index, NULL);
+       if (rc < 0)
+               return rc;
+
        if (!IS_ERR(kthread_run(target_recovery_thread,
        if (!IS_ERR(kthread_run(target_recovery_thread,
-                               lut, "tgt_recov"))) {
+                               lut, "tgt_recover_%d", index))) {
                wait_for_completion(&trd->trd_starting);
                LASSERT(obd->obd_recovering != 0);
        } else {
                wait_for_completion(&trd->trd_starting);
                LASSERT(obd->obd_recovering != 0);
        } else {
index b0a99ce..8c7b8fa 100644 (file)
@@ -1224,6 +1224,21 @@ int ldlm_cli_cancel_req(struct obd_export *exp, struct list_head *cancels,
                         GOTO(out, rc);
                 }
 
                         GOTO(out, rc);
                 }
 
+               /* If OSP want cancel cross-MDT lock, let's not block it in
+                * in recovery, otherwise the lock will not released, if
+                * the remote target is also in recovery, and it also need
+                * this lock, it might cause deadlock. */
+               if (exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS &&
+                   exp->exp_obd->obd_lu_dev != NULL &&
+                   exp->exp_obd->obd_lu_dev->ld_site != NULL) {
+                       struct lu_device *top_dev;
+
+                       top_dev = exp->exp_obd->obd_lu_dev->ld_site->ls_top_dev;
+                       if (top_dev != NULL &&
+                           top_dev->ld_obd->obd_recovering)
+                               req->rq_allow_replay = 1;
+               }
+
                 req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
                 req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
                 ptlrpc_at_set_req_timeout(req);
                 req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
                 req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
                 ptlrpc_at_set_req_timeout(req);
index 69c815c..643e31a 100644 (file)
@@ -885,6 +885,7 @@ static int osp_md_object_lock(const struct lu_env *env,
        struct ldlm_res_id      *res_id;
        struct dt_device        *dt_dev = lu2dt_dev(dt->do_lu.lo_dev);
        struct osp_device       *osp = dt2osp_dev(dt_dev);
        struct ldlm_res_id      *res_id;
        struct dt_device        *dt_dev = lu2dt_dev(dt->do_lu.lo_dev);
        struct osp_device       *osp = dt2osp_dev(dt_dev);
+       struct lu_device        *top_device;
        struct ptlrpc_request   *req;
        int                     rc = 0;
        __u64                   flags = 0;
        struct ptlrpc_request   *req;
        int                     rc = 0;
        __u64                   flags = 0;
@@ -907,6 +908,15 @@ static int osp_md_object_lock(const struct lu_env *env,
        if (IS_ERR(req))
                RETURN(PTR_ERR(req));
 
        if (IS_ERR(req))
                RETURN(PTR_ERR(req));
 
+       /* During recovery, it needs to let OSP send enqueue
+        * without checking recoverying status, in case the
+        * other target is being recovered at the same time,
+        * and if we wait here for the import to be recovered,
+        * it might cause deadlock */
+       top_device = dt_dev->dd_lu_dev.ld_site->ls_top_dev;
+       if (top_device->ld_obd->obd_recovering)
+               req->rq_allow_replay = 1;
+
        rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
                              (const union ldlm_policy_data *)policy,
                              &flags, NULL, 0, LVB_T_NONE, lh, 0);
        rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
                              (const union ldlm_policy_data *)policy,
                              &flags, NULL, 0, LVB_T_NONE, lh, 0);
index 033044d..c3d8408 100644 (file)
@@ -1690,6 +1690,7 @@ static int osp_it_fetch(const struct lu_env *env, struct osp_it *it)
        struct lu_device         *dev   = it->ooi_obj->do_lu.lo_dev;
        struct osp_device        *osp   = lu2osp_dev(dev);
        struct page             **pages;
        struct lu_device         *dev   = it->ooi_obj->do_lu.lo_dev;
        struct osp_device        *osp   = lu2osp_dev(dev);
        struct page             **pages;
+       struct lu_device *top_device;
        struct ptlrpc_request    *req   = NULL;
        struct ptlrpc_bulk_desc  *desc;
        struct idx_info          *ii;
        struct ptlrpc_request    *req   = NULL;
        struct ptlrpc_bulk_desc  *desc;
        struct idx_info          *ii;
@@ -1725,6 +1726,13 @@ static int osp_it_fetch(const struct lu_env *env, struct osp_it *it)
                RETURN(rc);
        }
 
                RETURN(rc);
        }
 
+       /* Let's allow this request during recovery, otherwise
+        * if the remote target is also in recovery status,
+        * it might cause deadlock */
+       top_device = dev->ld_site->ls_top_dev;
+       if (top_device->ld_obd->obd_recovering)
+               req->rq_allow_replay = 1;
+
        req->rq_request_portal = OUT_PORTAL;
        ii = req_capsule_client_get(&req->rq_pill, &RMF_IDX_INFO);
        memset(ii, 0, sizeof(*ii));
        req->rq_request_portal = OUT_PORTAL;
        ii = req_capsule_client_get(&req->rq_pill, &RMF_IDX_INFO);
        memset(ii, 0, sizeof(*ii));
index 7855b67..a775227 100755 (executable)
@@ -4294,6 +4294,41 @@ test_116b() {
 }
 run_test 116b "large update log slave MDT recovery"
 
 }
 run_test 116b "large update log slave MDT recovery"
 
+test_117() {
+       [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+       ([ $FAILURE_MODE == "HARD" ] &&
+               [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+               skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+               return 0
+       local index
+       local mds_indexs
+
+       mkdir -p $DIR/$tdir
+       $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/remote_dir
+       $LFS setdirstripe -i1 -c$MDSCOUNT $DIR/$tdir/remote_dir_1
+       sleep 2
+
+       # Let's set rdonly on all MDTs, so client will send
+       # replay requests on all MDTs and replay these requests
+       # at the same time. This test will verify the recovery
+       # will not be deadlock in this case, LU-7531.
+       for ((index = 0; index < $((MDSCOUNT)); index++)); do
+               replay_barrier mds$((index + 1))
+               if [ -z $mds_indexs ]; then
+                       mds_indexs="${mds_indexs}mds$((index+1))"
+               else
+                       mds_indexs="${mds_indexs},mds$((index+1))"
+               fi
+       done
+
+       rm -rf $DIR/$tdir/remote_dir
+       rm -rf $DIR/$tdir/remote_dir_1
+
+       fail $mds_indexs
+
+       rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 117 "DNE: cross MDT unlink, fail MDT1 and MDT2"
 
 complete $SECONDS
 check_and_cleanup_lustre
 
 complete $SECONDS
 check_and_cleanup_lustre