+ struct lod_recovery_data *lrd = arg;
+ struct lod_device *lod = lrd->lrd_lod;
+ struct dt_device *dt;
+ struct ptlrpc_thread *thread = lrd->lrd_thread;
+ struct llog_ctxt *ctxt = NULL;
+ struct lu_env env;
+ struct lu_target *lut;
+ struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lod_tgt_desc *tgt = NULL;
+ time64_t start;
+ int retries = 0;
+ int i;
+ int rc;
+
+ ENTRY;
+
+ thread->t_flags = SVC_RUNNING;
+ wake_up(&thread->t_ctl_waitq);
+
+ rc = lu_env_init(&env, LCT_LOCAL | LCT_MD_THREAD);
+ if (rc != 0) {
+ OBD_FREE_PTR(lrd);
+ CERROR("%s: can't initialize env: rc = %d\n",
+ lod2obd(lod)->obd_name, rc);
+ RETURN(rc);
+ }
+
+ lut = lod2lu_dev(lod)->ld_site->ls_tgt;
+ atomic_inc(&lut->lut_tdtd->tdtd_recovery_threads_count);
+ if (!lrd->lrd_ltd)
+ dt = lod->lod_child;
+ else
+ dt = lrd->lrd_ltd->ltd_tgt;
+
+ start = ktime_get_real_seconds();
+
+again:
+ rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx);
+ if (!rc && !lod->lod_child->dd_rdonly) {
+ /* Process the recovery record */
+ ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
+ LLOG_UPDATELOG_ORIG_CTXT);
+ LASSERT(ctxt != NULL);
+ LASSERT(ctxt->loc_handle != NULL);
+
+ rc = llog_cat_process(&env, ctxt->loc_handle,
+ lod_process_recovery_updates, lrd, 0, 0);
+ }
+
+ if (rc < 0) {
+ struct lu_device *top_device;
+
+ top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
+ /*
+ * Because the remote target might failover at the same time,
+ * let's retry here
+ */
+ if ((rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) &&
+ dt != lod->lod_child &&
+ !top_device->ld_obd->obd_abort_recovery &&
+ !top_device->ld_obd->obd_stopping) {
+ if (ctxt) {
+ if (ctxt->loc_handle)
+ llog_cat_close(&env,
+ ctxt->loc_handle);
+ llog_ctxt_put(ctxt);
+ }
+ retries++;
+ CDEBUG(D_HA, "%s get update log failed %d, retry\n",
+ dt->dd_lu_dev.ld_obd->obd_name, rc);
+ goto again;
+ }
+
+ CERROR("%s get update log failed: rc = %d\n",
+ dt->dd_lu_dev.ld_obd->obd_name, rc);
+ llog_ctxt_put(ctxt);
+
+ spin_lock(&top_device->ld_obd->obd_dev_lock);
+ if (!top_device->ld_obd->obd_abort_recovery &&
+ !top_device->ld_obd->obd_stopping)
+ top_device->ld_obd->obd_abort_recovery = 1;
+ spin_unlock(&top_device->ld_obd->obd_dev_lock);
+
+ GOTO(out, rc);
+ }
+ llog_ctxt_put(ctxt);
+
+ CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n",
+ dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start,
+ retries);
+
+ spin_lock(&lod->lod_lock);
+ if (!lrd->lrd_ltd)
+ lod->lod_child_got_update_log = 1;
+ else
+ lrd->lrd_ltd->ltd_got_update_log = 1;
+
+ if (!lod->lod_child_got_update_log) {
+ spin_unlock(&lod->lod_lock);
+ GOTO(out, rc = 0);
+ }
+
+ cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
+ tgt = LTD_TGT(ltd, i);
+ if (!tgt->ltd_got_update_log) {
+ spin_unlock(&lod->lod_lock);
+ GOTO(out, rc = 0);
+ }
+ }
+ lut->lut_tdtd->tdtd_replay_ready = 1;
+ spin_unlock(&lod->lod_lock);
+
+ CDEBUG(D_HA, "%s got update logs from all MDTs.\n",
+ lut->lut_obd->obd_name);
+ wake_up(&lut->lut_obd->obd_next_transno_waitq);
+ EXIT;
+
+out:
+ OBD_FREE_PTR(lrd);
+ thread->t_flags = SVC_STOPPED;
+ atomic_dec(&lut->lut_tdtd->tdtd_recovery_threads_count);
+ wake_up(&lut->lut_tdtd->tdtd_recovery_threads_waitq);
+ wake_up(&thread->t_ctl_waitq);
+ lu_env_fini(&env);
+ return rc;
+}
+
+/**
+ * finish sub llog context
+ *
+ * Stop update recovery thread for the sub device, then cleanup the
+ * correspondent llog ctxt.
+ *
+ * \param[in] env execution environment
+ * \param[in] lod lod device to do update recovery
+ * \param[in] thread recovery thread on this sub device
+ */
+void lod_sub_fini_llog(const struct lu_env *env,
+ struct dt_device *dt, struct ptlrpc_thread *thread)
+{
+ struct obd_device *obd;
+ struct llog_ctxt *ctxt;
+
+ ENTRY;
+
+ obd = dt->dd_lu_dev.ld_obd;
+ CDEBUG(D_INFO, "%s: finish sub llog\n", obd->obd_name);
+ /* Stop recovery thread first */
+ if (thread && thread->t_flags & SVC_RUNNING) {
+ thread->t_flags = SVC_STOPPING;
+ wake_up(&thread->t_ctl_waitq);
+ wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
+ }
+
+ ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT);
+ if (!ctxt)
+ RETURN_EXIT;
+
+ if (ctxt->loc_handle)
+ llog_cat_close(env, ctxt->loc_handle);
+
+ llog_cleanup(env, ctxt);
+
+ RETURN_EXIT;
+}
+
+/**
+ * Extract MDT target index from a device name.
+ *
+ * a helper function to extract index from the given device name
+ * like "fsname-MDTxxxx-mdtlov"
+ *
+ * \param[in] lodname device name
+ * \param[out] mdt_index extracted index
+ *
+ * \retval 0 on success
+ * \retval -EINVAL if the name is invalid
+ */
+int lodname2mdt_index(char *lodname, u32 *mdt_index)
+{
+ unsigned long index;