From: Sebastien Buisson Date: Tue, 12 Dec 2023 16:49:49 +0000 (+0100) Subject: LU-17357 mgc: wait for sptlrpc config log X-Git-Tag: 2.15.62~192 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=4a3e428361a03b4bc777eddd466ba1ff8b72b51e;p=fs%2Flustre-release.git LU-17357 mgc: wait for sptlrpc config log The sptlrpc config log is mandatory to establish connections to targets with proper security context. So wait for its retrieval. Add sanity-sec test_68 to exercise this, and improve test_32 for mgssec. Signed-off-by: Sebastien Buisson Change-Id: I5352e926dc6a9a68db1224629c68a42b74bee8a4 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53423 Reviewed-by: Mikhail Pershin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 40e4354..8ea0f75 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -278,7 +278,8 @@ struct config_llog_data { enum mgs_cfg_type cld_type; unsigned int cld_stopping:1, /* we were told to stop * watching */ - cld_lostlock:1; /* lock not requeued */ + cld_lostlock:1, /* lock not requeued */ + cld_processed:1; /* successfully fetched */ char cld_logname[0]; }; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index a2e593e..8036c6a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -469,6 +469,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c +#define OBD_FAIL_PTLRPC_DROP_MGS 0x51d #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 #define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH 0x522 diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index d957f9e..cb65855 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -1640,7 +1640,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc, if (rc && rc != -ENOENT) #else if (local_only) - GOTO(out_pop, rc); + GOTO(out_pop, rc = -EIO); #endif rc = class_config_parse_llog(env, ctxt, cld->cld_logname, &cld->cld_cfg); @@ -1804,6 +1804,10 @@ restart: CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n", mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc); + if (rc != -ETIMEDOUT && rc != -EIO && rc != -EAGAIN) { + cld->cld_processed = 1; + wake_up(&rq_waitq); + } /* Now drop the lock so MGS can revoke it */ if (!rcl) { @@ -1868,6 +1872,22 @@ static int mgc_process_config(struct obd_device *obd, size_t len, void *buf) break; } + /* if it exists, the sptlrpc config log really needs to be + * correctly processed before processing other logs, + * otherwise client might use incorrect sec flavor + */ + if (cld->cld_sptlrpc && !cld->cld_sptlrpc->cld_processed) { + unsigned int timeout = 120; + + /* we do not want to wait forever, + * we prefer a (excessively) long timeout + */ + timeout = max(20 * mgc_requeue_timeout_min, timeout); + wait_event_idle_timeout(rq_waitq, + cld->cld_sptlrpc->cld_processed, + cfs_time_seconds(timeout)); + } + rc = mgc_process_log(obd, cld); if (rc == 0 && cld->cld_recover != NULL) { if (OCD_HAS_FLAG(&obd->u.cli.cl_import-> diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 86ca9f1..0c72605 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -737,6 +737,13 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) RETURN(0); } + if (unlikely(CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DROP_MGS, cfs_fail_val) && + lustre_msg_get_opc(request->rq_reqmsg) == MGS_CONNECT)) { + DEBUG_REQ(D_INFO, request, "Simulate MGS connect failure"); + RETURN(0); + } + + LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); LASSERT(request->rq_wait_ctx == 0); diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh index 0b7fc17..2f1655d 100755 --- a/lustre/tests/sanity-sec.sh +++ b/lustre/tests/sanity-sec.sh @@ -2539,6 +2539,20 @@ cleanup_32() { start_gss_daemons $mds_HOST $LSVCGSSD "-vvv -s -m -o -z" fi + # restore MGS NIDs in key on MGS + do_nodes $mgs_HOST "$LGSS_SK -g $MGSNID -m \ + $SK_PATH/$FSNAME.key >/dev/null 2>&1" || + error "could not modify keyfile on MGS (3)" + + # load modified key file on MGS + do_nodes $mgs_HOST "$LGSS_SK -l $SK_PATH/$FSNAME.key >/dev/null 2>&1" || + error "could not load keyfile on MGS (3)" + + # restore MGS NIDs in key on client + do_nodes ${clients_arr[0]} "$LGSS_SK -g $MGSNID -m \ + $SK_PATH/$FSNAME.key >/dev/null 2>&1" || + error "could not modify keyfile on client (3)" + # re-mount client MOUNT_OPTS=$(add_sk_mntflag $MOUNT_OPTS) mountcli @@ -2547,6 +2561,9 @@ cleanup_32() { } test_32() { + local mgsnid2=$(host_nids_address $ost1_HOST $NETTYPE)@${MGSNID#*@} + local mgsorig=$MGSNID + if ! $SHARED_KEY; then skip "need shared key feature for this test" fi @@ -2564,7 +2581,7 @@ test_32() { umount_client $MOUNT2 || error "umount $MOUNT2 failed" fi if $(grep -q $MOUNT' ' /proc/mounts); then - umount_client $MOUNT || error "umount $MOUNT failed" + umount_client $MOUNT || error "umount $MOUNT failed" fi # kill daemon on MGS to start afresh @@ -2580,16 +2597,16 @@ test_32() { # add mgs key type and MGS NIDs in key on MGS do_nodes $mgs_HOST "$LGSS_SK -t mgs,server -g $MGSNID -m \ $SK_PATH/$FSNAME.key >/dev/null 2>&1" || - error "could not modify keyfile on MGS" + error "could not modify keyfile on MGS (1)" # load modified key file on MGS do_nodes $mgs_HOST "$LGSS_SK -l $SK_PATH/$FSNAME.key >/dev/null 2>&1" || - error "could not load keyfile on MGS" + error "could not load keyfile on MGS (1)" # add MGS NIDs in key on client do_nodes ${clients_arr[0]} "$LGSS_SK -g $MGSNID -m \ $SK_PATH/$FSNAME.key >/dev/null 2>&1" || - error "could not modify keyfile on MGS" + error "could not modify keyfile on client (1)" # set perms for per-nodemap keys else permission denied do_nodes $(comma_list $(all_nodes)) \ @@ -2599,6 +2616,7 @@ test_32() { # re-mount client with mgssec=skn save_opts=$MOUNT_OPTS + stack_trap "MOUNT_OPTS=$save_opts" EXIT if [ -z "$MOUNT_OPTS" ]; then MOUNT_OPTS="-o mgssec=skn" else @@ -2639,9 +2657,30 @@ test_32() { fi zconf_mount_clients ${clients_arr[0]} $MOUNT $MOUNT_OPTS || error "mount ${clients_arr[0]} with mgssec=ska failed" - MOUNT_OPTS=$save_opts - exit 0 + MGSNID=$mgsnid2:$mgsorig + stack_trap "MGSNID=$mgsorig" EXIT + + # umount client + zconf_umount_clients ${clients_arr[0]} $MOUNT || + error "umount ${clients_arr[0]} failed" + + # add MGS NIDs in key on MGS + do_nodes $mgs_HOST "$LGSS_SK -g ${MGSNID//:/,} -m \ + $SK_PATH/$FSNAME.key >/dev/null 2>&1" || + error "could not modify keyfile on MGS (2)" + + # load modified key file on MGS + do_nodes $mgs_HOST "$LGSS_SK -l $SK_PATH/$FSNAME.key >/dev/null 2>&1" || + error "could not load keyfile on MGS (2)" + + # add MGS NIDs in key on client + do_nodes ${clients_arr[0]} "$LGSS_SK -g ${MGSNID//:/,} -m \ + $SK_PATH/$FSNAME.key >/dev/null 2>&1" || + error "could not modify keyfile on client (2)" + + zconf_mount_clients ${clients_arr[0]} $MOUNT $MOUNT_OPTS || + error "mount ${clients_arr[0]} with alternate mgsnid failed" } run_test 32 "check for mgssec" @@ -6023,6 +6062,34 @@ test_65() { } run_test 65 "lfs find -printf %La and --attrs support" +cleanup_68() { + lctl set_param fail_loc=0 fail_val=0 + mount_client $MOUNT ${MOUNT_OPTS} || error "re-mount $MOUNT failed" + if is_mounted $MOUNT2; then + mount_client $MOUNT2 ${MOUNT_OPTS} || + error "re-mount $MOUNT2 failed" + fi +} + +test_68() { + stack_trap cleanup_68 EXIT + + # unmount client completely + umount_client $MOUNT || error "umount $MOUNT failed" + if is_mounted $MOUNT2; then + umount_client $MOUNT2 || error "umount $MOUNT2 failed" + fi + + #define CFS_FAIL_ONCE|OBD_FAIL_PTLRPC_DROP_MGS 0x51d + lctl set_param fail_loc=0x8000051d fail_val=20 + + zconf_mount_clients $HOSTNAME $MOUNT $MOUNT_OPTS || + error "mount failed" + + umount_client $MOUNT || error "re-umount $MOUNT failed" +} +run_test 68 "all config logs are processed" + log "cleanup: ======================================================" sec_unsetup() {