Whamcloud - gitweb
LU-14930 mdt: abort_recov_mdt shouldn't abort client recovery 10/44610/2
authorMikhail Pershin <mpershin@whamcloud.com>
Wed, 11 Aug 2021 14:30:48 +0000 (17:30 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Aug 2021 20:04:11 +0000 (20:04 +0000)
When abort_recov_mdt is set to abort MDT-MDT recovery then
abort_recovery flag is set too inside target_stop_recovery_thread()
call, that causes not just MDT-MDT recovery abort but aborts
also clients/MDT recovery.

Fixes: dd9e79b64d ("LU-12546 mdt: abort recovery between MDTs")
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Ibda05e91a2da90156e2b6c9fdcb2169cdbd50fe4
Reviewed-on: https://review.whamcloud.com/44610
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lib.c
lustre/mdt/mdt_handler.c
lustre/obdclass/lprocfs_status_server.c

index 7ce007b..64214e2 100644 (file)
@@ -1843,6 +1843,7 @@ void target_cleanup_recovery(struct obd_device *obd)
                return;
        }
        obd->obd_recovering = obd->obd_abort_recovery = 0;
+       obd->obd_abort_recov_mdt = 0;
        spin_unlock(&obd->obd_dev_lock);
 
        spin_lock(&obd->obd_recovery_task_lock);
@@ -2792,6 +2793,7 @@ static int target_recovery_thread(void *arg)
         */
        spin_lock(&obd->obd_dev_lock);
        obd->obd_recovering = obd->obd_abort_recovery = 0;
+       obd->obd_abort_recov_mdt = 0;
        spin_unlock(&obd->obd_dev_lock);
        spin_lock(&obd->obd_recovery_task_lock);
        target_cancel_recovery_timer(obd);
index e2fda89..47b97cf 100644 (file)
@@ -7184,13 +7184,14 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                struct obd_ioctl_data *data = karg;
 
                CERROR("%s: Aborting recovery for device\n", mdt_obd_name(mdt));
-               if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT)
+               if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT) {
                        obd->obd_abort_recov_mdt = 1;
-               else /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */
+                       wake_up(&obd->obd_next_transno_waitq);
+               } else { /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */
                        /* lctl didn't set OBD_FLG_ABORT_RECOV_OST < 2.13.57 */
                        obd->obd_abort_recovery = 1;
-
-               target_stop_recovery_thread(obd);
+                       target_stop_recovery_thread(obd);
+               }
                rc = 0;
                break;
        }
index e90cd29..8584b1b 100644 (file)
@@ -702,8 +702,11 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                goto out;
        }
 
-       /* sampled unlocked, but really... */
-       if (obd->obd_recovering == 0) {
+       /* There is gap between client data read from storage and setting
+        * obd_recovering so check obd_recovery_end as well to make sure
+        * recovery is really finished
+        */
+       if (obd->obd_recovery_end > 0 && !obd->obd_recovering) {
                seq_printf(m, "COMPLETE\n");
                seq_printf(m, "recovery_start: %lld\n",
                           (s64)ktime_get_real_seconds() -