Whamcloud - gitweb
LU-7836 ptlrpc: remove duplicate final ping req 93/19693/2
authorDi Wang <di.wang@intel.com>
Tue, 19 Apr 2016 13:53:51 +0000 (09:53 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 20 Jul 2016 17:42:15 +0000 (17:42 +0000)
Remove duplidate final ping req if the failover
server get new final ping from the same client,
otherwise the final ping will be piled up on
the recovery server, especially if recovery is
stuck during DNE failover.

Signed-off-by: Di Wang <di.wang@intel.com>
Change-Id: I41ecc88cede1024d386283e693698789ac0b7aa4
Reviewed-on: http://review.whamcloud.com/19693
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre_export.h
lustre/ldlm/ldlm_lib.c
lustre/tests/replay-single.sh

index 4e7a296..f1321cd 100644 (file)
@@ -270,7 +270,10 @@ struct obd_export {
                                   exp_libclient:1, /* liblustre client? */
                                  /* if to swap nidtbl entries for 2.2 clients.
                                   * Only used by the MGS to fix LU-1644. */
-                                 exp_need_mne_swab:1;
+                                 exp_need_mne_swab:1,
+                                 /* The export already got final replay ping
+                                  * request. */
+                                 exp_replay_done:1;
         /* also protected by exp_lock */
         enum lustre_sec_part      exp_sp_peer;
         struct sptlrpc_flavor     exp_flvr;             /* current */
index aeeee64..292f8ec 100644 (file)
@@ -2689,15 +2689,46 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
                wake_up(&obd->obd_next_transno_waitq);
                spin_lock(&obd->obd_recovery_task_lock);
                if (obd->obd_recovering) {
+                       struct ptlrpc_request *tmp;
+                       struct ptlrpc_request *duplicate = NULL;
+
+                       if (likely(!req->rq_export->exp_replay_done)) {
+                               req->rq_export->exp_replay_done = 1;
+                               list_add_tail(&req->rq_list,
+                                             &obd->obd_final_req_queue);
+                               spin_unlock(&obd->obd_recovery_task_lock);
+                               RETURN(0);
+                       }
+
+                       /* XXX O(n), but only happens if final ping is
+                        * timed out, probably reorganize the list as
+                        * a hash list later */
+                       list_for_each_entry_safe(reqiter, tmp,
+                                                &obd->obd_final_req_queue,
+                                                rq_list) {
+                               if (reqiter->rq_export == req->rq_export) {
+                                       list_del_init(&reqiter->rq_list);
+                                       duplicate = reqiter;
+                                       break;
+                               }
+                       }
+
                        list_add_tail(&req->rq_list,
-                                         &obd->obd_final_req_queue);
+                                     &obd->obd_final_req_queue);
+                       req->rq_export->exp_replay_done = 1;
+                       spin_unlock(&obd->obd_recovery_task_lock);
+
+                       if (duplicate != NULL) {
+                               DEBUG_REQ(D_HA, duplicate,
+                                         "put prev final req\n");
+                               target_request_copy_put(duplicate);
+                       }
+                       RETURN(0);
                } else {
                        spin_unlock(&obd->obd_recovery_task_lock);
                        target_request_copy_put(req);
                        RETURN(obd->obd_stopping ? -ENOTCONN : 1);
                }
-               spin_unlock(&obd->obd_recovery_task_lock);
-               RETURN(0);
        }
        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REQ_REPLAY_DONE) {
                /* client declares he's ready to replay locks */
index 7d34756..80d8d0d 100755 (executable)
@@ -3265,7 +3265,7 @@ test_90() { # bug 19494
 }
 run_test 90 "lfs find identifies the missing striped file segments"
 
-test_93() {
+test_93a() {
        local server_version=$(lustre_version_code $SINGLEMDS)
                [[ $server_version -ge $(version_code 2.6.90) ]] ||
                [[ $server_version -ge $(version_code 2.5.4) &&
@@ -3287,7 +3287,28 @@ test_93() {
        do_facet ost1 "$LCTL set_param fail_loc=0x715"
        fail ost1
 }
-run_test 93 "replay + reconnect"
+run_test 93a "replay + reconnect"
+
+test_93b() {
+       local server_version=$(lustre_version_code $SINGLEMDS)
+               [[ $server_version -ge $(version_code 2.7.90) ]] ||
+               { skip "Need MDS version 2.7.90+"; return; }
+
+       cancel_lru_locks mdc
+
+       createmany -o $DIR/$tfile 20 ||
+                       error "createmany -o $DIR/$tfile failed"
+
+       #define OBD_FAIL_TGT_REPLAY_RECONNECT     0x715
+       # We need to emulate a state that MDT is waiting for other clients
+       # not completing the recovery. Final ping is queued, but reply will be
+       # sent on the recovery completion. It is done by sleep before
+       # processing final pings
+       do_facet mds1 "$LCTL set_param fail_val=80"
+       do_facet mds1 "$LCTL set_param fail_loc=0x715"
+       fail mds1
+}
+run_test 93b "replay + reconnect on mds"
 
 striped_dir_check_100() {
        local striped_dir=$DIR/$tdir/striped_dir