Whamcloud - gitweb
LU-9311 pfl: shouldn't reprocess done/no-op resent request 74/26474/5
authorBobi Jam <bobijam.xu@intel.com>
Mon, 10 Apr 2017 17:50:04 +0000 (01:50 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 26 Apr 2017 03:38:13 +0000 (03:38 +0000)
When the LOVEA buffer is bigger than the request reply buffer, the
client will resend the layout write intent RPC, and
mdt_layout_change() should not reprocess it since the 2nd process
will try to cancel the 1st granted CR lock, while client has not
get it granted yet because of the reply buffer shortage resend the
RPC.

There is another layout change resent case: the client's job has been
done by another client, referring lod_declare_layout_change -EALREADY
case, and it became a operation w/o transaction, so we should not do
the layout change, otherwise mdt_layout_change() will try to cancel
the granted server CR lock whose remote counterpart is still in hold
on the client, and a deadlock ensues.

This patch also adjusts some debug messages, makes dump_lsm() dump
uninstantiated component stripe info.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I9b063ee54d57c233eca3250502a2707997892898
Reviewed-on: https://review.whamcloud.com/26474
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Tested-by: Jenkins
Reviewed-by: Emoly Liu <emoly.liu@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/md_object.h
lustre/llite/vvp_io.c
lustre/lov/lov_ea.c
lustre/mdt/mdt_handler.c

index 41970de..d64d243 100644 (file)
@@ -451,10 +451,6 @@ static inline int mo_layout_change(const struct lu_env *env,
                                   struct layout_intent *layout,
                                   const struct lu_buf *buf)
 {
-       CDEBUG(D_INFO, "got layout change request from client: "
-              "opc:%u flags:%#x extent[%#llx,%#llx)\n",
-              layout->li_opc, layout->li_flags,
-              layout->li_start, layout->li_end);
        /* need instantiate objects which in the access range */
        LASSERT(m->mo_ops->moo_layout_change);
        return m->mo_ops->moo_layout_change(env, m, layout, buf);
index 50afff6..700b179 100644 (file)
@@ -353,6 +353,9 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
                        end = io->u.ci_setattr.sa_attr.lvb_size;
                }
 
+               CDEBUG(D_VFSTRACE, DFID" type %d [%llx, %llx)\n",
+                      PFID(lu_object_fid(&obj->co_lu)), io->ci_type,
+                      start, end);
                rc = ll_layout_write_intent(inode, start, end);
                io->ci_result = rc;
                if (!rc)
index 04624a3..1315681 100644 (file)
@@ -516,7 +516,7 @@ void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
                       lse->lsme_pool_name);
                if (!lsme_inited(lse) ||
                    lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
-                       break;
+                       continue;
                for (j = 0; j < lse->lsme_stripe_count; j++) {
                        CDEBUG(level, "   oinfo:%p: ostid: "DOSTID
                               " ost idx: %d gen: %d\n",
index 4c0a59b..6b31479 100644 (file)
@@ -1252,6 +1252,10 @@ static int mdt_layout_change(struct mdt_thread_info *info,
        int rc;
        ENTRY;
 
+       CDEBUG(D_INFO, "got layout change request from client: "
+              "opc:%u flags:%#x extent[%#llx,%#llx)\n",
+              layout->li_opc, layout->li_flags,
+              layout->li_start, layout->li_end);
        if (layout->li_start >= layout->li_end) {
                CERROR("Recieved an invalid layout change range [%llu, %llu) "
                       "for "DFID"\n", layout->li_start, layout->li_end,
@@ -3575,6 +3579,27 @@ static int mdt_intent_layout(enum mdt_it_code opcode,
        if (layout_change) {
                struct lu_buf *buf = &info->mti_buf;
 
+               /**
+                * mdt_layout_change is a reint operation, when the request
+                * is resent, layout write shouldn't reprocess it again.
+                */
+               rc = mdt_check_resent(info, mdt_reconstruct_generic, lhc);
+               if (rc)
+                       GOTO(out_obj, rc = rc < 0 ? rc : 0);
+
+               /**
+                * There is another resent case: the client's job has been
+                * done by another client, referring lod_declare_layout_change
+                * -EALREADY case, and it became a operation w/o transaction,
+                * so we should not do the layout change, otherwise
+                * mdt_layout_change() will try to cancel the granted server
+                * CR lock whose remote counterpart is still in hold on the
+                * client, and a deadlock ensues.
+                */
+               rc = mdt_check_resent_lock(info, obj, lhc);
+               if (rc <= 0)
+                       GOTO(out_obj, rc);
+
                buf->lb_buf = NULL;
                buf->lb_len = 0;
                if (unlikely(req_is_replay(mdt_info_req(info)))) {