struct completion *lrd_started;
};
+static bool lod_recovery_abort(struct obd_device *top)
+{
+ return (top->obd_stopping || top->obd_abort_recovery ||
+ top->obd_abort_recov_mdt);
+}
/**
* process update recovery record
PFID(&llh->lgh_id.lgl_oi.oi_fid), rec->lrh_index);
lut = lod2lu_dev(lrd->lrd_lod)->ld_site->ls_tgt;
- if (lut->lut_obd->obd_stopping ||
- lut->lut_obd->obd_abort_recovery)
+ if (lod_recovery_abort(lut->lut_obd))
return -ESHUTDOWN;
return insert_update_records_to_replay_list(lut->lut_tdtd,
struct lu_env *env = &lrd->lrd_env;
struct lu_target *lut;
struct lu_tgt_desc *mdt = NULL;
+ struct lu_device *top_device;
time64_t start;
int retries = 0;
int rc;
} else {
rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx);
}
+
if (!rc && !lod->lod_child->dd_rdonly) {
/* Process the recovery record */
ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
lod_process_recovery_updates, lrd, 0, 0);
}
- if (rc < 0) {
- struct lu_device *top_device;
-
- top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
- /*
- * Because the remote target might failover at the same time,
- * let's retry here
- */
- if ((rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) &&
- dt != lod->lod_child &&
- !top_device->ld_obd->obd_abort_recovery &&
- !top_device->ld_obd->obd_stopping) {
+ top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
+ if (rc < 0 && dt != lod->lod_child &&
+ !lod_recovery_abort(top_device->ld_obd)) {
+ if (rc == -EBADR) {
+ /* remote update llog is shorter than expected from
+ * local header. Cached copy could be de-synced during
+ * recovery, trust remote llog data
+ */
+ CDEBUG(D_HA, "%s update log data de-sync\n",
+ dt->dd_lu_dev.ld_obd->obd_name);
+ rc = 0;
+ } else if (rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) {
+ /*
+ * the remote target might failover at the same time,
+ * let's retry here
+ */
if (ctxt) {
if (ctxt->loc_handle)
- llog_cat_close(env,
- ctxt->loc_handle);
+ llog_cat_close(env, ctxt->loc_handle);
llog_ctxt_put(ctxt);
+ ctxt = NULL;
}
retries++;
CDEBUG(D_HA, "%s get update log failed %d, retry\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
goto again;
}
+ }
+ llog_ctxt_put(ctxt);
+ if (rc < 0) {
CERROR("%s get update log failed: rc = %d\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
- llog_ctxt_put(ctxt);
-
spin_lock(&top_device->ld_obd->obd_dev_lock);
- if (!top_device->ld_obd->obd_abort_recovery &&
- !top_device->ld_obd->obd_stopping)
- top_device->ld_obd->obd_abort_recovery = 1;
+ if (!lod_recovery_abort(top_device->ld_obd))
+ /* abort just MDT-MDT recovery */
+ top_device->ld_obd->obd_abort_recov_mdt = 1;
spin_unlock(&top_device->ld_obd->obd_dev_lock);
-
GOTO(out, rc);
}
- llog_ctxt_put(ctxt);
CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n",
dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start,
case OBD_IOC_ABORT_RECOVERY: {
struct obd_ioctl_data *data = karg;
- CERROR("%s: Aborting recovery for device\n", mdt_obd_name(mdt));
if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT) {
+ CERROR("%s: Aborting MDT recovery\n",
+ mdt_obd_name(mdt));
obd->obd_abort_recov_mdt = 1;
wake_up(&obd->obd_next_transno_waitq);
} else { /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */
/* lctl didn't set OBD_FLG_ABORT_RECOV_OST < 2.13.57 */
+ CERROR("%s: Aborting client recovery\n",
+ mdt_obd_name(mdt));
obd->obd_abort_recovery = 1;
target_stop_recovery_thread(obd);
}
* Parse non-ldiskfs options here. Rather than modifying
* ldiskfs, we just zero these out here
*/
- if (strncmp(s1, "abort_recov", 11) == 0) {
- lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
- clear++;
- } else if (strncmp(s1, "abort_recov_mdt", 15) == 0) {
+ if (!strncmp(s1, "abort_recov_mdt", 15) ||
+ !strncmp(s1, "abort_recovery_mdt", 18)) {
lmd->lmd_flags |= LMD_FLG_ABORT_RECOV_MDT;
clear++;
+ } else if (strncmp(s1, "abort_recov", 11) == 0) {
+ lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
+ clear++;
} else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
lmd->lmd_recovery_time_soft =
max_t(int, simple_strtoul(s1 + 19, NULL, 10),