Whamcloud - gitweb
LU-17357 mgc: wait for sptlrpc config log 23/53423/19
authorSebastien Buisson <sbuisson@ddn.com>
Tue, 12 Dec 2023 16:49:49 +0000 (17:49 +0100)
committerOleg Drokin <green@whamcloud.com>
Fri, 23 Feb 2024 07:13:27 +0000 (07:13 +0000)
The sptlrpc config log is mandatory to establish connections to
targets with proper security context. So wait for its retrieval.

Add sanity-sec test_68 to exercise this, and improve test_32
for mgssec.

Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I5352e926dc6a9a68db1224629c68a42b74bee8a4
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53423
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/mgc/mgc_request.c
lustre/ptlrpc/niobuf.c
lustre/tests/sanity-sec.sh

index 40e4354..8ea0f75 100644 (file)
@@ -278,7 +278,8 @@ struct config_llog_data {
        enum mgs_cfg_type           cld_type;
        unsigned int                cld_stopping:1, /* we were told to stop
                                                     * watching */
-                                   cld_lostlock:1; /* lock not requeued */
+                                   cld_lostlock:1, /* lock not requeued */
+                                   cld_processed:1;  /* successfully fetched */
        char                        cld_logname[0];
 };
 
index a2e593e..8036c6a 100644 (file)
@@ -469,6 +469,7 @@ extern bool obd_enable_health_write;
 #define OBD_FAIL_PTLRPC_DROP_BULK       0x51a
 #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK  0x51b
 #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
+#define OBD_FAIL_PTLRPC_DROP_MGS         0x51d
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3  0x520
 #define OBD_FAIL_PTLRPC_BULK_ATTACH      0x521
 #define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH      0x522
index d957f9e..cb65855 100644 (file)
@@ -1640,7 +1640,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc,
        if (rc && rc != -ENOENT)
 #else
        if (local_only)
-               GOTO(out_pop, rc);
+               GOTO(out_pop, rc = -EIO);
 #endif
                rc = class_config_parse_llog(env, ctxt, cld->cld_logname,
                                             &cld->cld_cfg);
@@ -1804,6 +1804,10 @@ restart:
 
        CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n",
               mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc);
+       if (rc != -ETIMEDOUT && rc != -EIO && rc != -EAGAIN) {
+               cld->cld_processed = 1;
+               wake_up(&rq_waitq);
+       }
 
        /* Now drop the lock so MGS can revoke it */
        if (!rcl) {
@@ -1868,6 +1872,22 @@ static int mgc_process_config(struct obd_device *obd, size_t len, void *buf)
                        break;
                }
 
+               /* if it exists, the sptlrpc config log really needs to be
+                * correctly processed before processing other logs,
+                * otherwise client might use incorrect sec flavor
+                */
+               if (cld->cld_sptlrpc && !cld->cld_sptlrpc->cld_processed) {
+                       unsigned int timeout = 120;
+
+                       /* we do not want to wait forever,
+                        * we prefer a (excessively) long timeout
+                        */
+                       timeout = max(20 * mgc_requeue_timeout_min, timeout);
+                       wait_event_idle_timeout(rq_waitq,
+                                               cld->cld_sptlrpc->cld_processed,
+                                               cfs_time_seconds(timeout));
+               }
+
                rc = mgc_process_log(obd, cld);
                if (rc == 0 && cld->cld_recover != NULL) {
                        if (OCD_HAS_FLAG(&obd->u.cli.cl_import->
index 86ca9f1..0c72605 100644 (file)
@@ -737,6 +737,13 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                RETURN(0);
        }
 
+       if (unlikely(CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DROP_MGS, cfs_fail_val) &&
+                    lustre_msg_get_opc(request->rq_reqmsg) == MGS_CONNECT)) {
+               DEBUG_REQ(D_INFO, request, "Simulate MGS connect failure");
+               RETURN(0);
+       }
+
+
        LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
        LASSERT(request->rq_wait_ctx == 0);
 
index 0b7fc17..2f1655d 100755 (executable)
@@ -2539,6 +2539,20 @@ cleanup_32() {
                start_gss_daemons $mds_HOST $LSVCGSSD "-vvv -s -m -o -z"
        fi
 
+       # restore MGS NIDs in key on MGS
+       do_nodes $mgs_HOST "$LGSS_SK -g $MGSNID -m \
+                               $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
+               error "could not modify keyfile on MGS (3)"
+
+       # load modified key file on MGS
+       do_nodes $mgs_HOST "$LGSS_SK -l $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
+               error "could not load keyfile on MGS (3)"
+
+       # restore MGS NIDs in key on client
+       do_nodes ${clients_arr[0]} "$LGSS_SK -g $MGSNID -m \
+                               $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
+               error "could not modify keyfile on client (3)"
+
        # re-mount client
        MOUNT_OPTS=$(add_sk_mntflag $MOUNT_OPTS)
        mountcli
@@ -2547,6 +2561,9 @@ cleanup_32() {
 }
 
 test_32() {
+       local mgsnid2=$(host_nids_address $ost1_HOST $NETTYPE)@${MGSNID#*@}
+       local mgsorig=$MGSNID
+
        if ! $SHARED_KEY; then
                skip "need shared key feature for this test"
        fi
@@ -2564,7 +2581,7 @@ test_32() {
                umount_client $MOUNT2 || error "umount $MOUNT2 failed"
        fi
        if $(grep -q $MOUNT' ' /proc/mounts); then
-       umount_client $MOUNT || error "umount $MOUNT failed"
+               umount_client $MOUNT || error "umount $MOUNT failed"
        fi
 
        # kill daemon on MGS to start afresh
@@ -2580,16 +2597,16 @@ test_32() {
        # add mgs key type and MGS NIDs in key on MGS
        do_nodes $mgs_HOST "$LGSS_SK -t mgs,server -g $MGSNID -m \
                                $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
-               error "could not modify keyfile on MGS"
+               error "could not modify keyfile on MGS (1)"
 
        # load modified key file on MGS
        do_nodes $mgs_HOST "$LGSS_SK -l $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
-               error "could not load keyfile on MGS"
+               error "could not load keyfile on MGS (1)"
 
        # add MGS NIDs in key on client
        do_nodes ${clients_arr[0]} "$LGSS_SK -g $MGSNID -m \
                                $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
-               error "could not modify keyfile on MGS"
+               error "could not modify keyfile on client (1)"
 
        # set perms for per-nodemap keys else permission denied
        do_nodes $(comma_list $(all_nodes)) \
@@ -2599,6 +2616,7 @@ test_32() {
 
        # re-mount client with mgssec=skn
        save_opts=$MOUNT_OPTS
+       stack_trap "MOUNT_OPTS=$save_opts" EXIT
        if [ -z "$MOUNT_OPTS" ]; then
                MOUNT_OPTS="-o mgssec=skn"
        else
@@ -2639,9 +2657,30 @@ test_32() {
        fi
        zconf_mount_clients ${clients_arr[0]} $MOUNT $MOUNT_OPTS ||
                error "mount ${clients_arr[0]} with mgssec=ska failed"
-       MOUNT_OPTS=$save_opts
 
-       exit 0
+       MGSNID=$mgsnid2:$mgsorig
+       stack_trap "MGSNID=$mgsorig" EXIT
+
+       # umount client
+       zconf_umount_clients ${clients_arr[0]} $MOUNT ||
+               error "umount ${clients_arr[0]} failed"
+
+       # add MGS NIDs in key on MGS
+       do_nodes $mgs_HOST "$LGSS_SK -g ${MGSNID//:/,} -m \
+                               $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
+               error "could not modify keyfile on MGS (2)"
+
+       # load modified key file on MGS
+       do_nodes $mgs_HOST "$LGSS_SK -l $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
+               error "could not load keyfile on MGS (2)"
+
+       # add MGS NIDs in key on client
+       do_nodes ${clients_arr[0]} "$LGSS_SK -g ${MGSNID//:/,} -m \
+                               $SK_PATH/$FSNAME.key >/dev/null 2>&1" ||
+               error "could not modify keyfile on client (2)"
+
+       zconf_mount_clients ${clients_arr[0]} $MOUNT $MOUNT_OPTS ||
+               error "mount ${clients_arr[0]} with alternate mgsnid failed"
 }
 run_test 32 "check for mgssec"
 
@@ -6023,6 +6062,34 @@ test_65() {
 }
 run_test 65 "lfs find -printf %La and --attrs support"
 
+cleanup_68() {
+       lctl set_param fail_loc=0 fail_val=0
+       mount_client $MOUNT ${MOUNT_OPTS} || error "re-mount $MOUNT failed"
+       if is_mounted $MOUNT2; then
+               mount_client $MOUNT2 ${MOUNT_OPTS} ||
+                       error "re-mount $MOUNT2 failed"
+       fi
+}
+
+test_68() {
+       stack_trap cleanup_68 EXIT
+
+       # unmount client completely
+       umount_client $MOUNT || error "umount $MOUNT failed"
+       if is_mounted $MOUNT2; then
+               umount_client $MOUNT2 || error "umount $MOUNT2 failed"
+       fi
+
+       #define CFS_FAIL_ONCE|OBD_FAIL_PTLRPC_DROP_MGS    0x51d
+       lctl set_param fail_loc=0x8000051d fail_val=20
+
+       zconf_mount_clients $HOSTNAME $MOUNT $MOUNT_OPTS ||
+               error "mount failed"
+
+       umount_client $MOUNT || error "re-umount $MOUNT failed"
+}
+run_test 68 "all config logs are processed"
+
 log "cleanup: ======================================================"
 
 sec_unsetup() {