From 8248c89371b8d156ab17ea24de5712f7242cdf9e Mon Sep 17 00:00:00 2001 From: Di Wang Date: Thu, 8 Oct 2015 01:09:04 -0700 Subject: [PATCH] LU-7277 lod: keep trying to get remote update log Because the remote MDT might be in recovery at the same time, let's Keep trying to get remote update log until the recovery is abort. Signed-off-by: Di Wang Change-Id: Id9543201ce543be730e73f9f51f3f7a0d10d3dfc Reviewed-on: http://review.whamcloud.com/16786 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/lod/lod_dev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 70dd8fb..9a8f0d9 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -374,6 +374,7 @@ static int lod_sub_recovery_thread(void *arg) else dt = lrd->lrd_ltd->ltd_tgt; +again: rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx); if (rc != 0) GOTO(out, rc); @@ -388,6 +389,15 @@ static int lod_sub_recovery_thread(void *arg) llog_ctxt_put(ctxt); if (rc < 0) { + struct lu_device *top_device; + + top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev; + /* Because the remote target might failover at the same time, + * let's retry here */ + if (rc == -ETIMEDOUT && dt != lod->lod_child && + !top_device->ld_obd->obd_force_abort_recovery) + goto again; + CERROR("%s getting update log failed: rc = %d\n", dt->dd_lu_dev.ld_obd->obd_name, rc); GOTO(out, rc); -- 1.8.3.1