Whamcloud - gitweb
LU-2397 recovery: check queue_len > 0 in check_for_next_transno
authorMikhail Pershin <mike.pershin@intel.com>
Fri, 11 Jan 2013 07:21:10 +0000 (11:21 +0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 18 Jan 2013 16:41:47 +0000 (11:41 -0500)
The check_for_next_transno rely on obd_req_replay_clients is unable
to change during that function call, it is not true.

Patch makes sure that case with zero queue_len is excluded and
check for obd_req_replay_clients is 0 after comparing with queue_len

Signed-off-by: Mikhail Pershin <mike.pershin@intel.com>
Change-Id: I0b81e0f8e6a4f2cbfd51b6a049ee273cec4732a2
Reviewed-on: http://review.whamcloud.com/4998
Tested-by: Hudson
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lib.c

index 59fb9f6..540c6bb 100644 (file)
@@ -1735,13 +1735,11 @@ static int check_for_next_transno(struct obd_device *obd)
         } else if (obd->obd_recovery_expired) {
                 CDEBUG(D_HA, "waking for expired recovery\n");
                 wake_up = 1;
         } else if (obd->obd_recovery_expired) {
                 CDEBUG(D_HA, "waking for expired recovery\n");
                 wake_up = 1;
-        } else if (cfs_atomic_read(&obd->obd_req_replay_clients) == 0) {
-                CDEBUG(D_HA, "waking for completed recovery\n");
-                wake_up = 1;
         } else if (req_transno == next_transno) {
                 CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno);
                 wake_up = 1;
         } else if (req_transno == next_transno) {
                 CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno);
                 wake_up = 1;
-        } else if (queue_len == cfs_atomic_read(&obd->obd_req_replay_clients)) {
+       } else if (queue_len > 0 &&
+                  queue_len == cfs_atomic_read(&obd->obd_req_replay_clients)) {
                 int d_lvl = D_HA;
                 /** handle gaps occured due to lost reply or VBR */
                 LASSERTF(req_transno >= next_transno,
                 int d_lvl = D_HA;
                 /** handle gaps occured due to lost reply or VBR */
                 LASSERTF(req_transno >= next_transno,
@@ -1759,6 +1757,9 @@ static int check_for_next_transno(struct obd_device *obd)
                        req_transno, obd->obd_last_committed);
                 obd->obd_next_recovery_transno = req_transno;
                 wake_up = 1;
                        req_transno, obd->obd_last_committed);
                 obd->obd_next_recovery_transno = req_transno;
                 wake_up = 1;
+       } else if (cfs_atomic_read(&obd->obd_req_replay_clients) == 0) {
+               CDEBUG(D_HA, "waking for completed recovery\n");
+               wake_up = 1;
         } else if (OBD_FAIL_CHECK(OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS)) {
                 CDEBUG(D_HA, "accepting transno gaps is explicitly allowed"
                        " by fail_lock, waking up ("LPD64")\n", next_transno);
         } else if (OBD_FAIL_CHECK(OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS)) {
                 CDEBUG(D_HA, "accepting transno gaps is explicitly allowed"
                        " by fail_lock, waking up ("LPD64")\n", next_transno);