Whamcloud - gitweb
LU-11967 mdt: reint layout_change in standard way 65/35465/6
authorLai Siyao <lai.siyao@whamcloud.com>
Sun, 30 Jun 2019 15:26:11 +0000 (23:26 +0800)
committerOleg Drokin <green@whamcloud.com>
Fri, 4 Oct 2019 03:37:55 +0000 (03:37 +0000)
Layout_change is a reint operation, and it should be handled the
same as other reint operations, so that resent and replay can
work correctly.

Also replace the lock passed in ldlm_handle_enqueue0() with the
lock taken in mdt_layout_change(). This avoids taking lock again
in ldlm_handle_enqueue0(), and also makes replay eaiser. Note,
before replacing, the mode is downgraded from EX to CR, because
client only needs this mode, as can avoid unnecessary lock cancel
later.

Add missing resent reconstructor for REINT_RESYNC.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I328044dacbf18d03232c9bbb51271f6202e9b939
Reviewed-on: https://review.whamcloud.com/35465
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lock.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_open.c
lustre/mdt/mdt_recovery.c
lustre/mdt/mdt_reint.c

index 3e64224..c3f7577 100644 (file)
@@ -2643,7 +2643,7 @@ int ldlm_export_cancel_locks(struct obd_export *exp)
 }
 
 /**
- * Downgrade an PW/EX lock to COS mode.
+ * Downgrade an PW/EX lock to COS | CR mode.
  *
  * A lock mode convertion from PW/EX mode to less conflict mode. The
  * convertion may fail if lock was canceled before downgrade, but it doesn't
@@ -2655,6 +2655,8 @@ int ldlm_export_cancel_locks(struct obd_export *exp)
  * things are cleared, so any pending or new blocked lock on that lock will
  * cause new call to blocking_ast and force resource object commit.
  *
+ * Also used by layout_change to replace EX lock to CR lock.
+ *
  * \param lock A lock to convert
  * \param new_mode new lock mode
  */
@@ -2663,7 +2665,7 @@ void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode)
 #ifdef HAVE_SERVER_SUPPORT
        ENTRY;
 
-       LASSERT(new_mode == LCK_COS);
+       LASSERT(new_mode == LCK_COS || new_mode == LCK_CR);
 
        lock_res_and_lock(lock);
 
index f74b3fc..7603903 100644 (file)
@@ -1464,42 +1464,54 @@ out:
 /**
  * Handler of layout intent RPC requiring the layout modification
  *
- * \param[in] info     thread environment
- * \param[in] obj      object
- * \param[in] layout   layout change descriptor
+ * \param[in]  info    thread environment
+ * \param[in]  obj     object
+ * \param[out] lhc     object ldlm lock handle
+ * \param[in]  layout  layout change descriptor
  *
  * \retval 0   on success
  * \retval < 0 error code
  */
 int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
+                     struct mdt_lock_handle *lhc,
                      struct md_layout_change *layout)
 {
-       struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_LOCAL];
        int rc;
+
        ENTRY;
 
        if (!mdt_object_exists(obj))
-               GOTO(out, rc = -ENOENT);
+               RETURN(-ENOENT);
 
        if (!S_ISREG(lu_object_attr(&obj->mot_obj)))
-               GOTO(out, rc = -EINVAL);
+               RETURN(-EINVAL);
 
        rc = mo_permission(info->mti_env, NULL, mdt_object_child(obj), NULL,
                           MAY_WRITE);
        if (rc)
-               GOTO(out, rc);
+               RETURN(rc);
 
-       /* take layout lock to prepare layout change */
-       mdt_lock_reg_init(lh, LCK_EX);
-       rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LAYOUT);
-       if (rc)
-               GOTO(out, rc);
+       rc = mdt_check_resent_lock(info, obj, lhc);
+       if (rc < 0)
+               RETURN(rc);
+
+       if (rc > 0) {
+               /* not resent */
+               mdt_lock_handle_init(lhc);
+               mdt_lock_reg_init(lhc, LCK_EX);
+               rc = mdt_reint_object_lock(info, obj, lhc, MDS_INODELOCK_LAYOUT,
+                                          false);
+               if (rc)
+                       RETURN(rc);
+       }
 
        mutex_lock(&obj->mot_som_mutex);
        rc = mo_layout_change(info->mti_env, mdt_object_child(obj), layout);
        mutex_unlock(&obj->mot_som_mutex);
-       mdt_object_unlock(info, obj, lh, 1);
-out:
+
+       if (rc)
+               mdt_object_unlock(info, obj, lhc, 1);
+
        RETURN(rc);
 }
 
@@ -4066,13 +4078,16 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
                             struct ldlm_lock **lockp,
                             __u64 flags)
 {
-       struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_LAYOUT];
+       struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_RMT];
        struct md_layout_change layout = { .mlc_opc = MD_LAYOUT_NOP };
        struct layout_intent *intent;
+       struct ldlm_reply *ldlm_rep;
        struct lu_fid *fid = &info->mti_tmp_fid2;
        struct mdt_object *obj = NULL;
        int layout_size = 0;
+       struct lu_buf *buf = &layout.mlc_buf;
        int rc = 0;
+
        ENTRY;
 
        fid_extract_from_res_name(fid, &(*lockp)->l_resource->lr_name);
@@ -4100,24 +4115,16 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
        case LAYOUT_INTENT_RESTORE:
                CERROR("%s: Unsupported layout intent opc %d\n",
                       mdt_obd_name(info->mti_mdt), intent->li_opc);
-               rc = -ENOTSUPP;
-               break;
+               RETURN(-ENOTSUPP);
        default:
                CERROR("%s: Unknown layout intent opc %d\n",
                       mdt_obd_name(info->mti_mdt), intent->li_opc);
-               rc = -EINVAL;
-               break;
+               RETURN(-EINVAL);
        }
-       if (rc < 0)
-               RETURN(rc);
-
-       /* Get lock from request for possible resent case. */
-       mdt_intent_fixup_resent(info, *lockp, lhc, flags);
 
        obj = mdt_object_find(info->mti_env, info->mti_mdt, fid);
        if (IS_ERR(obj))
-               GOTO(out, rc = PTR_ERR(obj));
-
+               RETURN(PTR_ERR(obj));
 
        if (mdt_object_exists(obj) && !mdt_object_remote(obj)) {
                /* if layout is going to be changed don't use the current EA
@@ -4129,7 +4136,7 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
                } else {
                        layout_size = mdt_attr_get_eabuf_size(info, obj);
                        if (layout_size < 0)
-                               GOTO(out_obj, rc = layout_size);
+                               GOTO(out, rc = layout_size);
 
                        if (layout_size > info->mti_mdt->mdt_max_mdsize)
                                info->mti_mdt->mdt_max_mdsize = layout_size;
@@ -4142,72 +4149,68 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
         * set reply buffer size, so that ldlm_handle_enqueue0()->
         * ldlm_lvbo_fill() will fill the reply buffer with lovea.
         */
-       (*lockp)->l_lvb_type = LVB_T_LAYOUT;
        req_capsule_set_size(info->mti_pill, &RMF_DLM_LVB, RCL_SERVER,
                             layout_size);
        rc = req_capsule_server_pack(info->mti_pill);
        if (rc)
-               GOTO(out_obj, rc);
+               GOTO(out, rc);
 
+       ldlm_rep = req_capsule_server_get(info->mti_pill, &RMF_DLM_REP);
+       if (!ldlm_rep)
+               GOTO(out, rc = -EPROTO);
 
-       if (layout.mlc_opc != MD_LAYOUT_NOP) {
-               struct lu_buf *buf = &layout.mlc_buf;
+       mdt_set_disposition(info, ldlm_rep, DISP_IT_EXECD);
 
-               /**
-                * mdt_layout_change is a reint operation, when the request
-                * is resent, layout write shouldn't reprocess it again.
-                */
-               rc = mdt_check_resent(info, mdt_reconstruct_generic, lhc);
-               if (rc)
-                       GOTO(out_obj, rc = rc < 0 ? rc : 0);
+       /* take lock in ldlm_lock_enqueue() for LAYOUT_INTENT_ACCESS */
+       if (layout.mlc_opc == MD_LAYOUT_NOP)
+               GOTO(out, rc = 0);
 
-               /**
-                * There is another resent case: the client's job has been
-                * done by another client, referring lod_declare_layout_change
-                * -EALREADY case, and it became a operation w/o transaction,
-                * so we should not do the layout change, otherwise
-                * mdt_layout_change() will try to cancel the granted server
-                * CR lock whose remote counterpart is still in hold on the
-                * client, and a deadlock ensues.
-                */
-               rc = mdt_check_resent_lock(info, obj, lhc);
-               if (rc <= 0)
-                       GOTO(out_obj, rc);
-
-               buf->lb_buf = NULL;
-               buf->lb_len = 0;
-               if (unlikely(req_is_replay(mdt_info_req(info)))) {
-                       buf->lb_buf = req_capsule_client_get(info->mti_pill,
-                                       &RMF_EADATA);
-                       buf->lb_len = req_capsule_get_size(info->mti_pill,
-                                       &RMF_EADATA, RCL_CLIENT);
-                       /*
-                        * If it's a replay of layout write intent RPC, the
-                        * client has saved the extended lovea when
-                        * it get reply then.
-                        */
-                       if (buf->lb_len > 0)
-                               mdt_fix_lov_magic(info, buf->lb_buf);
-               }
+       rc = mdt_check_resent(info, mdt_reconstruct_generic, lhc);
+       if (rc < 0)
+               GOTO(out, rc);
+       if (rc == 1) {
+               DEBUG_REQ(D_INODE, mdt_info_req(info), "resent opt.");
+               rc = lustre_msg_get_status(mdt_info_req(info)->rq_repmsg);
+               GOTO(out, rc);
+       }
+
+       buf->lb_buf = NULL;
+       buf->lb_len = 0;
+       if (unlikely(req_is_replay(mdt_info_req(info)))) {
+               buf->lb_buf = req_capsule_client_get(info->mti_pill,
+                                                    &RMF_EADATA);
+               buf->lb_len = req_capsule_get_size(info->mti_pill,
+                                                    &RMF_EADATA, RCL_CLIENT);
                /*
-                * Instantiate some layout components, if @buf contains
-                * lovea, then it's a replay of the layout intent write
-                * RPC.
+                * If it's a replay of layout write intent RPC, the client has
+                * saved the extended lovea when it get reply then.
                 */
-               rc = mdt_layout_change(info, obj, &layout);
-               if (rc)
-                       GOTO(out_obj, rc);
+               if (buf->lb_len > 0)
+                       mdt_fix_lov_magic(info, buf->lb_buf);
        }
-out_obj:
-       mdt_object_put(info->mti_env, obj);
 
-       if (rc == 0 && lustre_handle_is_used(&lhc->mlh_reg_lh))
+       /* Get lock from request for possible resent case. */
+       mdt_intent_fixup_resent(info, *lockp, lhc, flags);
+       (*lockp)->l_lvb_type = LVB_T_LAYOUT;
+
+       /*
+        * Instantiate some layout components, if @buf contains lovea, then it's
+        * a replay of the layout intent write RPC.
+        */
+       rc = mdt_layout_change(info, obj, lhc, &layout);
+       ldlm_rep->lock_policy_res2 = clear_serious(rc);
+
+       if (lustre_handle_is_used(&lhc->mlh_reg_lh)) {
                rc = mdt_intent_lock_replace(info, lockp, lhc, flags, rc);
+               if (rc == ELDLM_LOCK_REPLACED &&
+                   (*lockp)->l_granted_mode == LCK_EX)
+                       ldlm_lock_mode_downgrade(*lockp, LCK_CR);
+       }
 
+       EXIT;
 out:
-       lhc->mlh_reg_lh.cookie = 0;
-
-       RETURN(rc);
+       mdt_object_put(info->mti_env, obj);
+       return rc;
 }
 
 static int mdt_intent_open(enum ldlm_intent_flags it_opc,
index 7fe6b87..be7b459 100644 (file)
@@ -901,6 +901,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
                           struct md_attr *);
 void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
 int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
+                     struct mdt_lock_handle *lhc,
                      struct md_layout_change *spec);
 int mdt_device_sync(const struct lu_env *env, struct mdt_device *mdt);
 
index 624d4b1..c7cfc1a 100644 (file)
@@ -2197,13 +2197,15 @@ out_lease:
 static int mdt_close_resync_done(struct mdt_thread_info *info,
                                 struct mdt_object *o, struct md_attr *ma)
 {
-       struct close_data       *data;
-       struct ldlm_lock        *lease;
-       struct md_layout_change  layout = { 0 };
-       __u32                   *resync_ids = NULL;
-       size_t                   resync_count = 0;
-       bool                     lease_broken;
-       int                      rc;
+       struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_LOCAL];
+       struct close_data *data;
+       struct ldlm_lock *lease;
+       struct md_layout_change layout = { 0 };
+       __u32 *resync_ids = NULL;
+       size_t resync_count = 0;
+       bool lease_broken;
+       int rc;
+
        ENTRY;
 
        if (exp_connect_flags(info->mti_exp) & OBD_CONNECT_RDONLY)
@@ -2276,10 +2278,12 @@ static int mdt_close_resync_done(struct mdt_thread_info *info,
                layout.mlc_som.lsa_size = ma->ma_attr.la_size;
                layout.mlc_som.lsa_blocks = ma->ma_attr.la_blocks;
        }
-       rc = mdt_layout_change(info, o, &layout);
+       rc = mdt_layout_change(info, o, lhc, &layout);
        if (rc)
                GOTO(out_unlock, rc);
 
+       mdt_object_unlock(info, o, lhc, 0);
+
        EXIT;
 
 out_unlock:
index 6d878d3..cdb618e 100644 (file)
@@ -340,7 +340,8 @@ static mdt_reconstructor reconstructors[REINT_MAX] = {
         [REINT_OPEN]     = mdt_reconstruct_open,
        [REINT_SETXATTR] = mdt_reconstruct_generic,
        [REINT_RMENTRY]  = mdt_reconstruct_generic,
-       [REINT_MIGRATE] = mdt_reconstruct_generic
+       [REINT_MIGRATE]  = mdt_reconstruct_generic,
+       [REINT_RESYNC]   = mdt_reconstruct_generic
 };
 
 void mdt_reconstruct(struct mdt_thread_info *mti,
index 0fa2b6b..73c710e 100644 (file)
@@ -2703,14 +2703,15 @@ static int mdt_reint_resync(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
 {
        struct mdt_reint_record *rr = &info->mti_rr;
-       struct ptlrpc_request   *req = mdt_info_req(info);
-       struct md_attr          *ma = &info->mti_attr;
-       struct mdt_object       *mo;
-       struct ldlm_lock        *lease;
-       struct mdt_body         *repbody;
-       struct md_layout_change  layout = { .mlc_mirror_id = rr->rr_mirror_id };
-       bool                     lease_broken;
-       int                      rc, rc2;
+       struct ptlrpc_request *req = mdt_info_req(info);
+       struct md_attr *ma = &info->mti_attr;
+       struct mdt_object *mo;
+       struct ldlm_lock *lease;
+       struct mdt_body *repbody;
+       struct md_layout_change layout = { .mlc_mirror_id = rr->rr_mirror_id };
+       bool lease_broken;
+       int rc, rc2;
+
        ENTRY;
 
        DEBUG_REQ(D_INODE, req, DFID", FLR file resync", PFID(rr->rr_fid1));
@@ -2750,10 +2751,13 @@ static int mdt_reint_resync(struct mdt_thread_info *info,
 
        /* the file has yet opened by anyone else after we took the lease. */
        layout.mlc_opc = MD_LAYOUT_RESYNC;
-       rc = mdt_layout_change(info, mo, &layout);
+       lhc = &info->mti_lh[MDT_LH_LOCAL];
+       rc = mdt_layout_change(info, mo, lhc, &layout);
        if (rc)
                GOTO(out_unlock, rc);
 
+       mdt_object_unlock(info, mo, lhc, 0);
+
        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
        rc = mdt_attr_get_complex(info, mo, ma);