Whamcloud - gitweb
LU-5282 mdc: fix panic at mdc_free_open() 95/17495/13
authorAlexander Boyko <alexander.boyko@seagate.com>
Thu, 3 Dec 2015 06:57:36 +0000 (09:57 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 5 Jul 2016 23:48:14 +0000 (23:48 +0000)
Assertion was happened for open request when rq_replay is set
to 1.
    ASSERTION(mod->mod_open_req->rq_replay == 0)
But this situation is not fatal for client, and could happened
when mdc_close() failed.
The fix allow to free such requests. If mdc_close fail, MDS doesn`t
receive close request from client. And in a worst case client would
be evicted.

The test recreates issue when mdc_close failed and
client asserts:
   ASSERTION( mod->mod_open_req->rq_replay == 0 ) failed

Signed-off-by: Alexander Boyko <alexander.boyko@seagate.com>
Seagate-bug-id: MRP-3156
Change-Id: I5f98901f633355849fc107149eadc9cf171819af
Reviewed-on: http://review.whamcloud.com/17495
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/mdc/mdc_request.c
lustre/tests/sanity.sh

index c1eedb5..3016487 100644 (file)
@@ -457,6 +457,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDC_GETATTR_ENQUEUE     0x803
 #define OBD_FAIL_MDC_RPCS_SEM           0x804
 #define OBD_FAIL_MDC_LIGHTWEIGHT        0x805
 #define OBD_FAIL_MDC_GETATTR_ENQUEUE     0x803
 #define OBD_FAIL_MDC_RPCS_SEM           0x804
 #define OBD_FAIL_MDC_LIGHTWEIGHT        0x805
+#define OBD_FAIL_MDC_CLOSE              0x806
 
 #define OBD_FAIL_MGS                     0x900
 #define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
 
 #define OBD_FAIL_MGS                     0x900
 #define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
index e347423..f5dfcdc 100644 (file)
@@ -699,9 +699,15 @@ static void mdc_free_open(struct md_open_data *mod)
            imp_connect_disp_stripe(mod->mod_open_req->rq_import))
                committed = 1;
 
            imp_connect_disp_stripe(mod->mod_open_req->rq_import))
                committed = 1;
 
-       LASSERT(mod->mod_open_req->rq_replay == 0);
+       /**
+        * No reason to asssert here if the open request has
+        * rq_replay == 1. It means that mdc_close failed, and
+        * close request wasn`t sent. It is not fatal to client.
+        * The worst thing is eviction if the client gets open lock
+        **/
 
 
-       DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "free open request\n");
+       DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "free open request rq_replay"
+                 "= %d\n", mod->mod_open_req->rq_replay);
 
        ptlrpc_request_committed(mod->mod_open_req, committed);
        if (mod->mod_close_req)
 
        ptlrpc_request_committed(mod->mod_open_req, committed);
        if (mod->mod_close_req)
@@ -760,15 +766,43 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
        }
 
        *request = NULL;
        }
 
        *request = NULL;
-       req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
-        if (req == NULL)
-                RETURN(-ENOMEM);
+       if (OBD_FAIL_CHECK(OBD_FAIL_MDC_CLOSE))
+               req = NULL;
+       else
+               req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
+
+       /* Ensure that this close's handle is fixed up during replay. */
+       if (likely(mod != NULL)) {
+               LASSERTF(mod->mod_open_req != NULL &&
+                        mod->mod_open_req->rq_type != LI_POISON,
+                        "POISONED open %p!\n", mod->mod_open_req);
+
+               mod->mod_close_req = req;
+
+               DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
+               /* We no longer want to preserve this open for replay even
+                * though the open was committed. b=3632, b=3633 */
+               spin_lock(&mod->mod_open_req->rq_lock);
+               mod->mod_open_req->rq_replay = 0;
+               spin_unlock(&mod->mod_open_req->rq_lock);
+       } else {
+               CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
+       }
+       if (req == NULL) {
+               /**
+                * TODO: repeat close after errors
+                */
+               CWARN("%s: close of FID "DFID" failed, file reference will be "
+                     "dropped when this client unmounts or is evicted\n",
+                     obd->obd_name, PFID(&op_data->op_fid1));
+               GOTO(out, rc = -ENOMEM);
+       }
 
 
-        rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
-        if (rc) {
-                ptlrpc_request_free(req);
-                RETURN(rc);
-        }
+       rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
+       if (rc) {
+               ptlrpc_request_free(req);
+               GOTO(out, rc);
+       }
 
         /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
          * portal whose threads are not taking any DLM locks and are therefore
 
         /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
          * portal whose threads are not taking any DLM locks and are therefore
@@ -776,23 +810,6 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
         req->rq_request_portal = MDS_READPAGE_PORTAL;
         ptlrpc_at_set_req_timeout(req);
 
         req->rq_request_portal = MDS_READPAGE_PORTAL;
         ptlrpc_at_set_req_timeout(req);
 
-        /* Ensure that this close's handle is fixed up during replay. */
-        if (likely(mod != NULL)) {
-                LASSERTF(mod->mod_open_req != NULL &&
-                         mod->mod_open_req->rq_type != LI_POISON,
-                         "POISONED open %p!\n", mod->mod_open_req);
-
-                mod->mod_close_req = req;
-
-                DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
-                /* We no longer want to preserve this open for replay even
-                 * though the open was committed. b=3632, b=3633 */
-               spin_lock(&mod->mod_open_req->rq_lock);
-               mod->mod_open_req->rq_replay = 0;
-               spin_unlock(&mod->mod_open_req->rq_lock);
-        } else {
-                 CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
-        }
 
         mdc_close_pack(req, op_data);
 
 
         mdc_close_pack(req, op_data);
 
@@ -837,6 +854,7 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
                 }
         }
 
                 }
         }
 
+out:
         if (mod) {
                 if (rc != 0)
                         mod->mod_close_req = NULL;
         if (mod) {
                 if (rc != 0)
                         mod->mod_close_req = NULL;
index 66c1ccc..c8b46c8 100644 (file)
@@ -13953,6 +13953,14 @@ test_256() {
 }
 run_test 256 "Check llog delete for empty and not full state"
 
 }
 run_test 256 "Check llog delete for empty and not full state"
 
+test_260() {
+#define OBD_FAIL_MDC_CLOSE               0x806
+       $LCTL set_param fail_loc=0x80000806
+       touch $DIR/$tfile
+
+}
+run_test 260 "Check mdc_close fail"
+
 cleanup_test_300() {
        trap 0
        umask $SAVE_UMASK
 cleanup_test_300() {
        trap 0
        umask $SAVE_UMASK