From cefabee52586f443bfd5163f6ac0b5e1b56a9db7 Mon Sep 17 00:00:00 2001 From: Alexander Zarochentsev Date: Wed, 15 Dec 2021 13:26:02 +0300 Subject: [PATCH] LU-15112 mgc: do not ignore target registration failure A serious target registation failure with LDD_F_ERROR flag set is ignored by target, it makes possible registreting new target with already used index; Writeconf flag should be encoded in fs label regardless the "first_time" flag, otherwise target cannot be registered after initial registration failure. HPE-bug-id: LUS-8752 Signed-off-by: Alexander Zarochentsev Change-Id: If051199d3dbafc8f8102f3daf086de01bc5c5f98 Reviewed-on: https://review.whamcloud.com/45259 Reviewed-by: Alexander Boyko Tested-by: jenkins Reviewed-by: Alexey Lyashkov Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/mgc/mgc_request.c | 15 +++++++++------ lustre/mgs/mgs_llog.c | 10 ++++++++-- lustre/obdclass/obd_mount_server.c | 10 ++++++---- lustre/tests/conf-sanity.sh | 17 +++++++++++++++++ lustre/utils/mkfs_lustre.c | 16 ++++++++++++---- 5 files changed, 52 insertions(+), 16 deletions(-) diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 000937c..13d1b6b 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -1184,12 +1184,15 @@ static int mgc_target_register(struct obd_export *exp, req->rq_delay_limit = MGC_TARGET_REG_LIMIT_MAX; rc = ptlrpc_queue_wait(req); - if (!rc) { - rep_mti = req_capsule_server_get(&req->rq_pill, - &RMF_MGS_TARGET_INFO); - memcpy(mti, rep_mti, sizeof(*rep_mti)); - CDEBUG(D_MGC, "register %s got index = %d\n", - mti->mti_svname, mti->mti_stripe_index); + if (ptlrpc_client_replied(req)) { + rep_mti = req_capsule_server_get(&req->rq_pill, + &RMF_MGS_TARGET_INFO); + if (rep_mti) + memcpy(mti, rep_mti, sizeof(*rep_mti)); + } + if (!rc) { + CDEBUG(D_MGC, "register %s got index = %d\n", + mti->mti_svname, mti->mti_stripe_index); } ptlrpc_req_finished(req); diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 36fc0d6..d20dcec 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -636,10 +636,16 @@ static bool server_make_name(u32 flags, u16 index, const char *fs, bool invalid_flag = false; if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { + char reg_flag = '-'; + + if (flags & LDD_F_WRITECONF) + reg_flag = '='; + else if (flags & LDD_F_VIRGIN) + reg_flag = ':'; + if (!(flags & LDD_F_SV_ALL)) snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs, - (flags & LDD_F_VIRGIN) ? ':' : - ((flags & LDD_F_WRITECONF) ? '=' : '-'), + reg_flag, (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", index); } else if (flags & LDD_F_SV_TYPE_MGS) { diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index d82ef2a..cd4c721 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -1220,7 +1220,7 @@ static int server_register_target(struct lustre_sb_info *lsi) { struct obd_device *mgc = lsi->lsi_mgc; struct mgs_target_info *mti = NULL; - bool writeconf; + bool must_succeed; int rc; int tried = 0; ENTRY; @@ -1243,8 +1243,10 @@ static int server_register_target(struct lustre_sb_info *lsi) libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index, mti->mti_flags); - /* if write_conf is true, the registration must succeed */ - writeconf = !!(lsi->lsi_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE)); + /* we cannot ignore registration failure if MGS logs must be updated. */ + must_succeed = !!(lsi->lsi_flags & + (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_WRITECONF | + LDD_F_VIRGIN)); mti->mti_flags |= LDD_F_OPC_REG; again: @@ -1260,7 +1262,7 @@ again: "%s: the MGS refuses to allow this server " "to start: rc = %d. Please see messages on " "the MGS.\n", lsi->lsi_svname, rc); - } else if (writeconf) { + } else if (must_succeed) { if ((rc == -ESHUTDOWN || rc == -EIO) && ++tried < 5) { /* The connection with MGS is not established. * Try again after 2 seconds. Interruptable. */ diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index d65c75d..a45468a 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -9312,6 +9312,23 @@ test_128() } run_test 128 "Force using remote logs with --nolocallogs" +test_129() +{ + stopall + start_mds || error "MDS start failed" + format_ost 1 + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS && + error "start ost1 should fail" || true + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS && + error "second start ost1 should fail" || true + do_facet ost1 "$TUNEFS --writeconf $(ostdevname 1)" + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || + error "start ost1 failed" + stop ost1 + stop_mds +} +run_test 129 "attempt to connect an OST with the same index should fail" + test_130() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 75979b9..8caf62d 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -227,12 +227,20 @@ static bool server_make_name(__u32 flags, __u16 index, const char *fs, bool invalid_flag = false; if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { + char reg_flag = '-'; + + if (flags & LDD_F_WRITECONF) + reg_flag = '='; + else if (flags & LDD_F_VIRGIN) + reg_flag = ':'; + else if (flags & LDD_F_NO_LOCAL_LOGS) + reg_flag = '+'; + + if (!(flags & LDD_F_SV_ALL)) snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs, - (flags & LDD_F_VIRGIN) ? ':' : - ((flags & LDD_F_WRITECONF) ? '=' : - ((flags & LDD_F_NO_LOCAL_LOGS) ? '+' : '-')), - (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", + reg_flag, + (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", index); } else if (flags & LDD_F_SV_TYPE_MGS) { snprintf(name_buf, name_buf_size, "MGS"); -- 1.8.3.1