LU-15938 lod: prevent endless retry in recovery thread

author Mikhail Pershin <mpershin@whamcloud.com>

Wed, 22 Jun 2022 10:27:48 +0000 (13:27 +0300)

committer Andreas Dilger <adilger@whamcloud.com>

Fri, 15 Jul 2022 04:25:21 +0000 (04:25 +0000)
author Mikhail Pershin <mpershin@whamcloud.com>
Wed, 22 Jun 2022 10:27:48 +0000 (13:27 +0300)
committer Andreas Dilger <adilger@whamcloud.com>
Fri, 15 Jul 2022 04:25:21 +0000 (04:25 +0000)
diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c

index 86cb84e..71e3f3c 100644 (file)
--- a/lustre/lod/lod_dev.c
+++ b/lustre/lod/lod_dev.c
@@ -276,6 +276,11 @@ struct lod_recovery_data {
         struct completion       *lrd_started;
  };
  
+static bool lod_recovery_abort(struct obd_device *top)
+{
+       return (top->obd_stopping || top->obd_abort_recovery ||
+               top->obd_abort_recov_mdt);
+}
  
  /**
   * process update recovery record
@@ -332,8 +337,7 @@ static int lod_process_recovery_updates(const struct lu_env *env,
                PFID(&llh->lgh_id.lgl_oi.oi_fid), rec->lrh_index);
         lut = lod2lu_dev(lrd->lrd_lod)->ld_site->ls_tgt;
  
-       if (lut->lut_obd->obd_stopping ||
-           lut->lut_obd->obd_abort_recovery)
+       if (lod_recovery_abort(lut->lut_obd))
                 return -ESHUTDOWN;
  
         return insert_update_records_to_replay_list(lut->lut_tdtd,
@@ -361,6 +365,7 @@ static int lod_sub_recovery_thread(void *arg)
         struct lu_env *env = &lrd->lrd_env;
         struct lu_target *lut;
         struct lu_tgt_desc *mdt = NULL;
+       struct lu_device *top_device;
         time64_t start;
         int retries = 0;
         int rc;
@@ -386,6 +391,7 @@ again:
         } else {
                 rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx);
         }
+
         if (!rc && !lod->lod_child->dd_rdonly) {
                 /* Process the recovery record */
                 ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
@@ -397,43 +403,46 @@ again:
                                       lod_process_recovery_updates, lrd, 0, 0);
         }
  
-       if (rc < 0) {
-               struct lu_device *top_device;
-
-               top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
-               /*
-                * Because the remote target might failover at the same time,
-                * let's retry here
-                */
-               if ((rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) &&
-                    dt != lod->lod_child &&
-                   !top_device->ld_obd->obd_abort_recovery &&
-                   !top_device->ld_obd->obd_stopping) {
+       top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
+       if (rc < 0 && dt != lod->lod_child &&
+           !lod_recovery_abort(top_device->ld_obd)) {
+               if (rc == -EBADR) {
+                       /* remote update llog is shorter than expected from
+                        * local header. Cached copy could be de-synced during
+                        * recovery, trust remote llog data
+                        */
+                       CDEBUG(D_HA, "%s update log data de-sync\n",
+                              dt->dd_lu_dev.ld_obd->obd_name);
+                       rc = 0;
+               } else if (rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) {
+                       /*
+                        * the remote target might failover at the same time,
+                        * let's retry here
+                        */
                         if (ctxt) {
                                 if (ctxt->loc_handle)
-                                       llog_cat_close(env,
-                                                      ctxt->loc_handle);
+                                       llog_cat_close(env, ctxt->loc_handle);
                                 llog_ctxt_put(ctxt);
+                               ctxt = NULL;
                         }
                         retries++;
                         CDEBUG(D_HA, "%s get update log failed %d, retry\n",
                                dt->dd_lu_dev.ld_obd->obd_name, rc);
                         goto again;
                 }
+       }
  
+       llog_ctxt_put(ctxt);
+       if (rc < 0) {
                 CERROR("%s get update log failed: rc = %d\n",
                        dt->dd_lu_dev.ld_obd->obd_name, rc);
-               llog_ctxt_put(ctxt);
-
                 spin_lock(&top_device->ld_obd->obd_dev_lock);
-               if (!top_device->ld_obd->obd_abort_recovery &&
-                   !top_device->ld_obd->obd_stopping)
-                       top_device->ld_obd->obd_abort_recovery = 1;
+               if (!lod_recovery_abort(top_device->ld_obd))
+                       /* abort just MDT-MDT recovery */
+                       top_device->ld_obd->obd_abort_recov_mdt = 1;
                 spin_unlock(&top_device->ld_obd->obd_dev_lock);
-
                 GOTO(out, rc);
         }
-       llog_ctxt_put(ctxt);
  
         CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n",
                dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start,
diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c

index b82e1a9..1b0fbf3 100644 (file)
--- a/lustre/mdt/mdt_handler.c
+++ b/lustre/mdt/mdt_handler.c
@@ -7430,12 +7430,15 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         case OBD_IOC_ABORT_RECOVERY: {
                 struct obd_ioctl_data *data = karg;
  
-               CERROR("%s: Aborting recovery for device\n", mdt_obd_name(mdt));
                 if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT) {
+                       CERROR("%s: Aborting MDT recovery\n",
+                              mdt_obd_name(mdt));
                         obd->obd_abort_recov_mdt = 1;
                         wake_up(&obd->obd_next_transno_waitq);
                 } else { /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */
                         /* lctl didn't set OBD_FLG_ABORT_RECOV_OST < 2.13.57 */
+                       CERROR("%s: Aborting client recovery\n",
+                              mdt_obd_name(mdt));
                         obd->obd_abort_recovery = 1;
                         target_stop_recovery_thread(obd);
                 }
diff --git a/lustre/obdclass/llog_osd.c b/lustre/obdclass/llog_osd.c

index 6ebb15c..dd379e4 100644 (file)
--- a/lustre/obdclass/llog_osd.c
+++ b/lustre/obdclass/llog_osd.c
@@ -1086,7 +1086,12 @@ retry:
                 *cur_offset = last_offset;
                 *cur_idx = last_idx;
         }
-       GOTO(out, rc = -EIO);
+       /* being here means we reach end of llog but didn't find needed idx
+        * normally could happen while processing remote llog, return -EBADR
+        * to indicate access beyond end of file like dt_read() does and to
+        * distunguish this situation from real IO or network issues.
+        */
+       GOTO(out, rc = -EBADR);
  out:
         dt_read_unlock(env, o);
         return rc;
diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c

index 3ec7d9c..dab0b17 100644 (file)
--- a/lustre/obdclass/obd_mount.c
+++ b/lustre/obdclass/obd_mount.c
@@ -1347,12 +1347,13 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                  * Parse non-ldiskfs options here. Rather than modifying
                  * ldiskfs, we just zero these out here
                  */
-               if (strncmp(s1, "abort_recov", 11) == 0) {
-                       lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
-                       clear++;
-               } else if (strncmp(s1, "abort_recov_mdt", 15) == 0) {
+               if (!strncmp(s1, "abort_recov_mdt", 15) ||
+                   !strncmp(s1, "abort_recovery_mdt", 18)) {
                         lmd->lmd_flags |= LMD_FLG_ABORT_RECOV_MDT;
                         clear++;
+               } else if (strncmp(s1, "abort_recov", 11) == 0) {
+                       lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
+                       clear++;
                 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
                         lmd->lmd_recovery_time_soft =
                                 max_t(int, simple_strtoul(s1 + 19, NULL, 10),
author	Mikhail Pershin <mpershin@whamcloud.com>
	Wed, 22 Jun 2022 10:27:48 +0000 (13:27 +0300)
committer	Andreas Dilger <adilger@whamcloud.com>
	Fri, 15 Jul 2022 04:25:21 +0000 (04:25 +0000)
lustre/lod/lod_dev.c		patch \| blob \| history
lustre/mdt/mdt_handler.c		patch \| blob \| history
lustre/obdclass/llog_osd.c		patch \| blob \| history
lustre/obdclass/obd_mount.c		patch \| blob \| history