From 79bfc74869e3f7b052874f4585399c5ba7f599e9 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Thu, 11 Jan 2018 23:27:19 +0800 Subject: [PATCH] LU-10045 obdclass: multiple try when register target It is possible that the connection between MGC and MGS has not been established when register target to MGS for server mount. At that time, the ptlrpcd may be trying to (re-)connect to MGS at background. Under such case, the mount process should not report failure (-ESHUTDOWN -r -EIO), instead, it can retry the MGS_TARGET_REG RPC after sometime (such as 2 seconds). Signed-off-by: Fan Yong Change-Id: I44e53a9d1de037907bdb5148b8c44d332439a50c Reviewed-on: https://review.whamcloud.com/30761 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: Faccini Bruno Reviewed-by: Oleg Drokin --- lustre/obdclass/obd_mount_server.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index cbe627e..8441d5b 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -1187,6 +1187,7 @@ static int server_register_target(struct lustre_sb_info *lsi) struct mgs_target_info *mti = NULL; bool writeconf; int rc; + int tried = 0; ENTRY; LASSERT(mgc); @@ -1211,6 +1212,7 @@ static int server_register_target(struct lustre_sb_info *lsi) writeconf = !!(lsi->lsi_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE)); mti->mti_flags |= LDD_F_OPC_REG; +again: /* Register the target */ /* FIXME use mgc_process_config instead */ rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp, @@ -1224,6 +1226,17 @@ static int server_register_target(struct lustre_sb_info *lsi) "to start: rc = %d. Please see messages on " "the MGS.\n", lsi->lsi_svname, rc); } else if (writeconf) { + if ((rc == -ESHUTDOWN || rc == -EIO) && ++tried < 5) { + /* The connection with MGS is not established. + * Try again after 2 seconds. Interruptable. */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout( + msecs_to_jiffies(MSEC_PER_SEC) * 2); + set_current_state(TASK_RUNNING); + if (!signal_pending(current)) + goto again; + } + LCONSOLE_ERROR_MSG(0x15f, "%s: cannot register this server with the MGS: " "rc = %d. Is the MGS running?\n", -- 1.8.3.1