- abort lod_sub_recovery_thread() by obd_abort_recov_mdt in
addition to obd_abort_recovery
- handle 'short llog' situation gracefully, when remote llog
is shorter than local copy header expects, trust remote llog
data and consider llog processing as finished
- on other errors during remote llog read, set obd_abort_recov_mdt
but not obd_abort_recovery in attempt to skip MDT-MDT recovery
only and continue with client recovery while possible
- fix parsing problem with 'abort_recov' and 'abort_recov_mdt' in
lmd_parse() causing no MDT recovery abort but client recovery
abort always. Allow also 'abort_recovery_mdt' mount option name
The original case with endless retry is caused by such de-sync
between local llog structures and remote llog. The local llog
header says there is record with some ID, so recovery thread
is trying to get that record from remote llog. Meanwhile there
is no such record on remote server, so it reads whole llog and
return it back properly but llog processing consider that as
incomplete llog due to network issues and retry endlessly.
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Ib127fd0d1abd5289d90c7b4b3ca74ab6fc78bc71
Reviewed-on: https://review.whamcloud.com/47698
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
struct completion *lrd_started;
};
struct completion *lrd_started;
};
+static bool lod_recovery_abort(struct obd_device *top)
+{
+ return (top->obd_stopping || top->obd_abort_recovery ||
+ top->obd_abort_recov_mdt);
+}
/**
* process update recovery record
/**
* process update recovery record
PFID(&llh->lgh_id.lgl_oi.oi_fid), rec->lrh_index);
lut = lod2lu_dev(lrd->lrd_lod)->ld_site->ls_tgt;
PFID(&llh->lgh_id.lgl_oi.oi_fid), rec->lrh_index);
lut = lod2lu_dev(lrd->lrd_lod)->ld_site->ls_tgt;
- if (lut->lut_obd->obd_stopping ||
- lut->lut_obd->obd_abort_recovery)
+ if (lod_recovery_abort(lut->lut_obd))
return -ESHUTDOWN;
return insert_update_records_to_replay_list(lut->lut_tdtd,
return -ESHUTDOWN;
return insert_update_records_to_replay_list(lut->lut_tdtd,
struct lu_env *env = &lrd->lrd_env;
struct lu_target *lut;
struct lu_tgt_desc *mdt = NULL;
struct lu_env *env = &lrd->lrd_env;
struct lu_target *lut;
struct lu_tgt_desc *mdt = NULL;
+ struct lu_device *top_device;
time64_t start;
int retries = 0;
int rc;
time64_t start;
int retries = 0;
int rc;
} else {
rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx);
}
} else {
rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx);
}
if (!rc && !lod->lod_child->dd_rdonly) {
/* Process the recovery record */
ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
if (!rc && !lod->lod_child->dd_rdonly) {
/* Process the recovery record */
ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
lod_process_recovery_updates, lrd, 0, 0);
}
lod_process_recovery_updates, lrd, 0, 0);
}
- if (rc < 0) {
- struct lu_device *top_device;
-
- top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
- /*
- * Because the remote target might failover at the same time,
- * let's retry here
- */
- if ((rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) &&
- dt != lod->lod_child &&
- !top_device->ld_obd->obd_abort_recovery &&
- !top_device->ld_obd->obd_stopping) {
+ top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
+ if (rc < 0 && dt != lod->lod_child &&
+ !lod_recovery_abort(top_device->ld_obd)) {
+ if (rc == -EBADR) {
+ /* remote update llog is shorter than expected from
+ * local header. Cached copy could be de-synced during
+ * recovery, trust remote llog data
+ */
+ CDEBUG(D_HA, "%s update log data de-sync\n",
+ dt->dd_lu_dev.ld_obd->obd_name);
+ rc = 0;
+ } else if (rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) {
+ /*
+ * the remote target might failover at the same time,
+ * let's retry here
+ */
if (ctxt) {
if (ctxt->loc_handle)
if (ctxt) {
if (ctxt->loc_handle)
- llog_cat_close(env,
- ctxt->loc_handle);
+ llog_cat_close(env, ctxt->loc_handle);
}
retries++;
CDEBUG(D_HA, "%s get update log failed %d, retry\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
goto again;
}
}
retries++;
CDEBUG(D_HA, "%s get update log failed %d, retry\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
goto again;
}
+ llog_ctxt_put(ctxt);
+ if (rc < 0) {
CERROR("%s get update log failed: rc = %d\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
CERROR("%s get update log failed: rc = %d\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
spin_lock(&top_device->ld_obd->obd_dev_lock);
spin_lock(&top_device->ld_obd->obd_dev_lock);
- if (!top_device->ld_obd->obd_abort_recovery &&
- !top_device->ld_obd->obd_stopping)
- top_device->ld_obd->obd_abort_recovery = 1;
+ if (!lod_recovery_abort(top_device->ld_obd))
+ /* abort just MDT-MDT recovery */
+ top_device->ld_obd->obd_abort_recov_mdt = 1;
spin_unlock(&top_device->ld_obd->obd_dev_lock);
spin_unlock(&top_device->ld_obd->obd_dev_lock);
CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n",
dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start,
CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n",
dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start,
case OBD_IOC_ABORT_RECOVERY: {
struct obd_ioctl_data *data = karg;
case OBD_IOC_ABORT_RECOVERY: {
struct obd_ioctl_data *data = karg;
- CERROR("%s: Aborting recovery for device\n", mdt_obd_name(mdt));
if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT) {
if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT) {
+ CERROR("%s: Aborting MDT recovery\n",
+ mdt_obd_name(mdt));
obd->obd_abort_recov_mdt = 1;
wake_up(&obd->obd_next_transno_waitq);
} else { /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */
/* lctl didn't set OBD_FLG_ABORT_RECOV_OST < 2.13.57 */
obd->obd_abort_recov_mdt = 1;
wake_up(&obd->obd_next_transno_waitq);
} else { /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */
/* lctl didn't set OBD_FLG_ABORT_RECOV_OST < 2.13.57 */
+ CERROR("%s: Aborting client recovery\n",
+ mdt_obd_name(mdt));
obd->obd_abort_recovery = 1;
target_stop_recovery_thread(obd);
}
obd->obd_abort_recovery = 1;
target_stop_recovery_thread(obd);
}
*cur_offset = last_offset;
*cur_idx = last_idx;
}
*cur_offset = last_offset;
*cur_idx = last_idx;
}
+ /* being here means we reach end of llog but didn't find needed idx
+ * normally could happen while processing remote llog, return -EBADR
+ * to indicate access beyond end of file like dt_read() does and to
+ * distunguish this situation from real IO or network issues.
+ */
+ GOTO(out, rc = -EBADR);
out:
dt_read_unlock(env, o);
return rc;
out:
dt_read_unlock(env, o);
return rc;
* Parse non-ldiskfs options here. Rather than modifying
* ldiskfs, we just zero these out here
*/
* Parse non-ldiskfs options here. Rather than modifying
* ldiskfs, we just zero these out here
*/
- if (strncmp(s1, "abort_recov", 11) == 0) {
- lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
- clear++;
- } else if (strncmp(s1, "abort_recov_mdt", 15) == 0) {
+ if (!strncmp(s1, "abort_recov_mdt", 15) ||
+ !strncmp(s1, "abort_recovery_mdt", 18)) {
lmd->lmd_flags |= LMD_FLG_ABORT_RECOV_MDT;
clear++;
lmd->lmd_flags |= LMD_FLG_ABORT_RECOV_MDT;
clear++;
+ } else if (strncmp(s1, "abort_recov", 11) == 0) {
+ lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
+ clear++;
} else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
lmd->lmd_recovery_time_soft =
max_t(int, simple_strtoul(s1 + 19, NULL, 10),
} else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
lmd->lmd_recovery_time_soft =
max_t(int, simple_strtoul(s1 + 19, NULL, 10),