From: Andriy Skulysh Date: Fri, 5 Aug 2016 11:25:02 +0000 (+0300) Subject: LU-8359 ldlm: Wrong evict during failover X-Git-Tag: 2.9.58~28 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=c60e949e3b9f7ff19e1a644210cc764ee150ad8b;p=fs%2Flustre-release.git LU-8359 ldlm: Wrong evict during failover There is a race between setting obd_fail & OBD_OPT_FAILOVER. tgt_client_del() checks only OBD_OPT_FAILOVER, class_disconnect_export_list() is called with flags copied from obd, and umount can start while disconnect is in progress. It is better to rely only on obd_fail. We shouldn't evict during failover at all, it should be handled on a new server. Such wrong evict can happen when server can't send CP AST to the client because failover has started already. Change-Id: I649d35d180b2239fe558b375872d3805629968a9 Seagate-bug-id: MRP-3604 Signed-off-by: Andriy Skulysh Reviewed-on: https://review.whamcloud.com/21114 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Ben Evans Reviewed-by: Alexander Zarochentsev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 7299b68..7aa1dff 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -164,7 +164,6 @@ do { \ enum obd_option { OBD_OPT_FORCE = 0x0001, - OBD_OPT_FAILOVER = 0x0002, OBD_OPT_ABORT_RECOV = 0x0004, }; diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 991586a..0fafed0 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -317,8 +317,7 @@ void class_disconnect_stale_exports(struct obd_device *, int (*test_export)(struct obd_export *)); static inline enum obd_option exp_flags_from_obd(struct obd_device *obd) { - return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) | - (obd->obd_force ? OBD_OPT_FORCE : 0) | + return ((obd->obd_force ? OBD_OPT_FORCE : 0) | (obd->obd_abort_recovery ? OBD_OPT_ABORT_RECOV : 0) | 0); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 20a2f8e..e281117 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5502,7 +5502,7 @@ static int mdt_export_cleanup(struct obd_export *exp) rc = mdt_ctxt_add_dirty_flag(&env, info, mfd); /* Don't unlink orphan on failover umount, LU-184 */ - if (exp->exp_flags & OBD_OPT_FAILOVER) { + if (exp->exp_obd->obd_fail) { ma->ma_valid = MA_FLAGS; ma->ma_attr_flags |= MDS_KEEP_ORPHAN; } @@ -5511,9 +5511,7 @@ static int mdt_export_cleanup(struct obd_export *exp) } info->mti_mdt = NULL; /* cleanup client slot early */ - /* Do not erase record for recoverable client. */ - if (!(exp->exp_flags & OBD_OPT_FAILOVER) || exp->exp_failed) - tgt_client_del(&env, exp); + tgt_client_del(&env, exp); lu_env_fini(&env); RETURN(rc); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 33d6b4f..ebd74a8 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -436,9 +436,7 @@ int ofd_obd_disconnect(struct obd_export *exp) tgt_grant_discard(exp); - /* Do not erase record for recoverable client. */ - if (exp->exp_obd->obd_replayable && - (!exp->exp_obd->obd_fail || exp->exp_failed)) { + if (exp->exp_obd->obd_replayable) { rc = lu_env_init(&env, LCT_DT_THREAD); if (rc) GOTO(out, rc); diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index c7aecdf..3a55026 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -1079,6 +1079,10 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) RETURN(-EINVAL); } + /* Do not erase record for recoverable client. */ + if (exp->exp_obd->obd_fail) + RETURN(0); + /* XXX if lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ if (!strcmp((char *)ted->ted_lcd->lcd_uuid, (char *)tgt->lut_obd->obd_uuid.uuid) || @@ -1105,9 +1109,6 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) LBUG(); } - /* Do not erase record for recoverable client. */ - if (exp->exp_flags & OBD_OPT_FAILOVER) - RETURN(0); if (OBD_FAIL_CHECK(OBD_FAIL_TGT_CLIENT_DEL)) RETURN(0);