Whamcloud - gitweb
LU-6655 ptlrpc: skip delayed replay requests
[fs/lustre-release.git] / lustre / ldlm / ldlm_lib.c
index c4865f9..5c122fb 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2010, 2016, Intel Corporation.
+ * Copyright (c) 2010, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -391,6 +391,9 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
        atomic_long_set(&cli->cl_unstable_count, 0);
        INIT_LIST_HEAD(&cli->cl_shrink_list);
 
+       INIT_LIST_HEAD(&cli->cl_flight_waiters);
+       cli->cl_rpcs_in_flight = 0;
+
        init_waitqueue_head(&cli->cl_destroy_waitq);
        atomic_set(&cli->cl_destroy_in_flight, 0);
 #ifdef ENABLE_CHECKSUM
@@ -428,7 +431,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
                        cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_MAX;
                else
                        cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
-        }
+       }
 
        spin_lock_init(&cli->cl_mod_rpcs_lock);
        spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock);
@@ -1076,7 +1079,8 @@ int target_handle_connect(struct ptlrpc_request *req)
                 *
                 * Via check OBD_CONNECT_FID, we can distinguish whether
                 * the OBD_CONNECT_MDS_MDS/OBD_CONNECT_MNE_SWAB is from
-                * MGC or MDT. */
+                * MGC or MDT, since MGC does not use OBD_CONNECT_FID.
+                */
                if (!lw_client &&
                    (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
                    (data->ocd_connect_flags & OBD_CONNECT_FID) &&
@@ -1573,12 +1577,13 @@ static void target_finish_recovery(struct lu_target *lut)
        obd->obd_recovery_end = ktime_get_real_seconds();
 
        /* When recovery finished, cleanup orphans on MDS and OST. */
-        if (OBT(obd) && OBP(obd, postrecov)) {
-                int rc = OBP(obd, postrecov)(obd);
-                if (rc < 0)
-                        LCONSOLE_WARN("%s: Post recovery failed, rc %d\n",
-                                      obd->obd_name, rc);
-        }
+       if (obd->obd_type && OBP(obd, postrecov)) {
+               int rc = OBP(obd, postrecov)(obd);
+
+               if (rc < 0)
+                       LCONSOLE_WARN("%s: Post recovery failed, rc %d\n",
+                                     obd->obd_name, rc);
+       }
         EXIT;
 }
 
@@ -1962,7 +1967,18 @@ static int target_recovery_overseer(struct lu_target *lut,
 {
        struct obd_device       *obd = lut->lut_obd;
        struct target_distribute_txn_data *tdtd;
+       time64_t last = 0;
+       time64_t now;
 repeat:
+       if (obd->obd_recovering && obd->obd_recovery_start == 0) {
+               now = ktime_get_seconds();
+               if (now - last > 600) {
+                       LCONSOLE_INFO("%s: in recovery but waiting for "
+                                     "the first client to connect\n",
+                                     obd->obd_name);
+                       last = now;
+               }
+       }
        if (obd->obd_recovery_start != 0 && ktime_get_real_seconds() >=
              (obd->obd_recovery_start + obd->obd_recovery_time_hard)) {
                __u64 next_update_transno = 0;
@@ -2678,6 +2694,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         target_process_req_flags(obd, req);
 
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) {
+               if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) {
+                       if (cfs_fail_val == 1) {
+                               cfs_race_state = 1;
+                               cfs_fail_val = 0;
+                               wake_up(&cfs_race_waitq);
+
+                               set_current_state(TASK_INTERRUPTIBLE);
+                               schedule_timeout(cfs_time_seconds(1));
+                       }
+               }
+
                 /* client declares he's ready to complete recovery
                  * so, we put the request on th final queue */
                target_request_copy_get(req);