Whamcloud - gitweb
LU-17484 gss: reply error for SEC_CTX_INIT on wrong node
authorSebastien Buisson <sbuisson@ddn.com>
Thu, 8 Feb 2024 12:44:21 +0000 (13:44 +0100)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 24 Feb 2024 03:46:45 +0000 (03:46 +0000)
When a server receives a SEC_CTX_INIT request for a target that is not
available (either stopping, or not set up yet, or moved to a failover
node), the request gets dropped. This makes the client-side RPC time
out, increasing the time it takes to establish a proper gss context
with the target, because it slows down the HA mechanism that tries
alternate failover NIDs.
Instead of dropping the request reply for SEC_CTX_INIT, the server
needs to send back a proper error reply. The client will then be able
to immediately try alternate failover NIDs, speeding mount/reconnect
process up, and avoiding potential eviction.

Lustre-change: https://review.whamcloud.com/53970
Lustre-commit: 3d635dd3f24421c181aca5673cd81ed8f3e2c622

Test-Parameters: trivial
Test-Parameters: kerberos=true testlist=sanity-krb5
Test-Parameters: testgroup=review-dne-selinux-ssk-part-2
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: Id2cefaa7d54729a63c7be13b65d7ace579bcaa78
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54157
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/ldlm/ldlm_lib.c
lustre/ptlrpc/gss/lproc_gss.c
lustre/ptlrpc/gss/sec_gss.c
lustre/ptlrpc/import.c
lustre/utils/gss/svcgssd_proc.c

index b89b5c1..027e7bf 100644 (file)
@@ -1134,8 +1134,9 @@ int target_handle_connect(struct ptlrpc_request *req)
        if (!target) {
                deuuidify(str, NULL, &target_start, &target_len);
                LCONSOLE_ERROR_MSG(0x137,
-                                  "%s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n",
-                                  str, libcfs_nid2str(req->rq_peer.nid));
+                                  "%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n",
+                                  target_len, target_start,
+                                  libcfs_nid2str(req->rq_peer.nid));
                GOTO(out, rc = -ENODEV);
        }
 
index ed29f3e..0fbd032 100644 (file)
@@ -269,6 +269,13 @@ again:
        rc = upcall_cache_downcall(rsicache, param->sid_err,
                                   param->sid_hash, param);
 
+       /* The caller, i.e. the userspace process writing to rsi_info, only
+        * needs to know about invalid values. Other errors are processed
+        * directly in the kernel.
+        */
+       if (rc != -EINVAL)
+               rc = 0;
+
 out:
        if (param != NULL)
                OBD_FREE_LARGE(param, size);
index e35a0fb..489646f 100644 (file)
@@ -1977,120 +1977,151 @@ int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor)
         RETURN(0);
 }
 
-static
-int gss_svc_handle_init(struct ptlrpc_request *req,
-                        struct gss_wire_ctx *gw)
-{
-        struct gss_svc_reqctx     *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
-        struct lustre_msg         *reqbuf = req->rq_reqbuf;
-        struct obd_uuid           *uuid;
-        struct obd_device         *target;
-        rawobj_t                   uuid_obj, rvs_hdl, in_token;
-        __u32                      lustre_svc;
-        __u32                     *secdata, seclen;
-        int                        swabbed, rc;
-        ENTRY;
+static int gss_svc_handle_init(struct ptlrpc_request *req,
+                              struct gss_wire_ctx *gw)
+{
+       struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+       struct lustre_msg *reqbuf = req->rq_reqbuf;
+       struct obd_uuid *uuid;
+       struct obd_device *target;
+       rawobj_t uuid_obj, rvs_hdl, in_token;
+       __u32 lustre_svc;
+       __u32 *secdata, seclen;
+       int swabbed, rc;
 
-        CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc,
-               libcfs_nid2str(req->rq_peer.nid));
+       ENTRY;
+       CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc,
+              libcfs_nid2str(req->rq_peer.nid));
 
-        req->rq_ctx_init = 1;
+       req->rq_ctx_init = 1;
 
-        if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
-                CERROR("unexpected bulk flag\n");
-                RETURN(SECSVC_DROP);
-        }
+       if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "unexpected bulk flag: rc = %d\n", rc);
+               RETURN(rc);
+       }
 
-        if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) {
-                CERROR("proc %u: invalid handle length %u\n",
-                       gw->gw_proc, gw->gw_handle.len);
-                RETURN(SECSVC_DROP);
-        }
+       if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) {
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "proc %u: invalid handle length %u: rc = %d\n",
+                      gw->gw_proc, gw->gw_handle.len, rc);
+               RETURN(rc);
+       }
 
-        if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4){
-                CERROR("Invalid bufcount %d\n", reqbuf->lm_bufcount);
-                RETURN(SECSVC_DROP);
-        }
+       if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4){
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "Invalid bufcount %d: rc = %d\n",
+                      reqbuf->lm_bufcount, rc);
+               RETURN(rc);
+       }
 
        swabbed = req_capsule_req_need_swab(&req->rq_pill);
 
-        /* ctx initiate payload is in last segment */
-        secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0);
-        seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1];
+       /* ctx initiate payload is in last segment */
+       secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0);
+       seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1];
 
-        if (seclen < 4 + 4) {
-                CERROR("sec size %d too small\n", seclen);
-                RETURN(SECSVC_DROP);
-        }
+       if (seclen < 4 + 4) {
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "sec size %d too small: rc = %d\n", seclen, rc);
+               RETURN(rc);
+       }
 
-        /* lustre svc type */
-        lustre_svc = le32_to_cpu(*secdata++);
-        seclen -= 4;
+       /* lustre svc type */
+       lustre_svc = le32_to_cpu(*secdata++);
+       seclen -= 4;
 
-        /* extract target uuid, note this code is somewhat fragile
-         * because touched internal structure of obd_uuid */
-        if (rawobj_extract(&uuid_obj, &secdata, &seclen)) {
-                CERROR("failed to extract target uuid\n");
-                RETURN(SECSVC_DROP);
-        }
-        uuid_obj.data[uuid_obj.len - 1] = '\0';
+       /* extract target uuid, note this code is somewhat fragile
+        * because touched internal structure of obd_uuid */
+       if (rawobj_extract(&uuid_obj, &secdata, &seclen)) {
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "failed to extract target uuid: rc = %d\n", rc);
+               RETURN(rc);
+       }
+       uuid_obj.data[uuid_obj.len - 1] = '\0';
+
+       uuid = (struct obd_uuid *) uuid_obj.data;
+       target = class_uuid2obd(uuid);
+       if (!target || target->obd_stopping || !target->obd_set_up) {
+               char *target_start;
+               int target_len;
+
+               if (gss_pack_err_notify(req, GSS_S_NO_CONTEXT, 0) == 0)
+                       rc = SECSVC_COMPLETE;
+               else
+                       rc = SECSVC_DROP;
+
+               deuuidify(uuid->uuid, NULL, &target_start, &target_len);
+               LCONSOLE_ERROR("%.*s: not available for GSS context init from %s (%s).\n",
+                              target_len, target_start,
+                              libcfs_nid2str(req->rq_peer.nid),
+                              target ?
+                              (target->obd_stopping ?
+                               "stopping" : "not set up") :
+                              "no target");
+               RETURN(rc);
+       }
 
-        uuid = (struct obd_uuid *) uuid_obj.data;
-        target = class_uuid2obd(uuid);
-        if (!target || target->obd_stopping || !target->obd_set_up) {
-                CERROR("target '%s' is not available for context init (%s)\n",
-                       uuid->uuid, target == NULL ? "no target" :
-                       (target->obd_stopping ? "stopping" : "not set up"));
-                RETURN(SECSVC_DROP);
-        }
+       /* extract reverse handle */
+       if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) {
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "%s: failed extract reverse handle: rc = %d\n",
+                      target->obd_name, rc);
+               RETURN(rc);
+       }
 
-        /* extract reverse handle */
-        if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) {
-                CERROR("failed extract reverse handle\n");
-                RETURN(SECSVC_DROP);
-        }
+       /* extract token */
+       if (rawobj_extract(&in_token, &secdata, &seclen)) {
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "%s: can't extract token: rc = %d\n",
+                      target->obd_name, rc);
+               RETURN(rc);
+       }
 
-        /* extract token */
-        if (rawobj_extract(&in_token, &secdata, &seclen)) {
-                CERROR("can't extract token\n");
-                RETURN(SECSVC_DROP);
-        }
+       rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc,
+                                       &rvs_hdl, &in_token);
+       if (rc != SECSVC_OK)
+               RETURN(rc);
 
-        rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc,
-                                        &rvs_hdl, &in_token);
-        if (rc != SECSVC_OK)
-                RETURN(rc);
 
        if (grctx->src_ctx->gsc_usr_mds || grctx->src_ctx->gsc_usr_oss ||
            grctx->src_ctx->gsc_usr_root)
-               CDEBUG(D_SEC, "create svc ctx %p: user from %s authenticated as %s\n",
+               CDEBUG(D_SEC,
+                      "%s: create svc ctx %p: user from %s authenticated as %s\n",
+                      target->obd_name,
                       grctx->src_ctx, libcfs_nid2str(req->rq_peer.nid),
                       grctx->src_ctx->gsc_usr_root ? "root" :
                       (grctx->src_ctx->gsc_usr_mds ? "mds" :
-                       (grctx->src_ctx->gsc_usr_oss ? "oss" : "null")));
+                       (grctx->src_ctx->gsc_usr_oss ? "oss" : "null")));
        else
-               CDEBUG(D_SEC, "create svc ctx %p: accept user %u from %s\n",
+               CDEBUG(D_SEC, "%s: create svc ctx %p: accept user %u from %s\n",
+                      target->obd_name,
                       grctx->src_ctx, grctx->src_ctx->gsc_uid,
                       libcfs_nid2str(req->rq_peer.nid));
 
-        if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
-                if (reqbuf->lm_bufcount < 4) {
-                        CERROR("missing user descriptor\n");
-                        RETURN(SECSVC_DROP);
-                }
-                if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) {
-                        CERROR("Mal-formed user descriptor\n");
-                        RETURN(SECSVC_DROP);
-                }
-
-                req->rq_pack_udesc = 1;
-                req->rq_user_desc = lustre_msg_buf(reqbuf, 2, 0);
-        }
+       if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
+               if (reqbuf->lm_bufcount < 4) {
+                       rc = SECSVC_DROP;
+                       CDEBUG(D_SEC, "%s: missing user descriptor: rc = %d\n",
+                              target->obd_name, rc);
+                       RETURN(rc);
+               }
+               if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) {
+                       rc = SECSVC_DROP;
+                       CDEBUG(D_SEC,
+                              "%s: Mal-formed user descriptor: rc = %d\n",
+                              target->obd_name, rc);
+                       RETURN(rc);
+               }
+
+               req->rq_pack_udesc = 1;
+               req->rq_user_desc = lustre_msg_buf(reqbuf, 2, 0);
+       }
 
-        req->rq_reqmsg = lustre_msg_buf(reqbuf, 1, 0);
-        req->rq_reqlen = lustre_msg_buflen(reqbuf, 1);
+       req->rq_reqmsg = lustre_msg_buf(reqbuf, 1, 0);
+       req->rq_reqlen = lustre_msg_buflen(reqbuf, 1);
 
-        RETURN(rc);
+       RETURN(rc);
 }
 
 /*
index f1ac344..2f2cccf 100644 (file)
@@ -145,9 +145,10 @@ void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
                 return;
 
         if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR),
-                    UUID_STR, strlen(UUID_STR)))
-                *uuid_len -= strlen(UUID_STR);
+                   UUID_STR, strlen(UUID_STR)))
+               *uuid_len -= strlen(UUID_STR);
 }
+EXPORT_SYMBOL(deuuidify);
 
 /* Must be called with imp_lock held! */
 static void ptlrpc_deactivate_import_nolock(struct obd_import *imp)
index a990e97..88e061a 100644 (file)
@@ -250,8 +250,9 @@ static int send_response(int auth_res, __u64 hash,
                rc = -errno;
                printerr(LL_ERR, "ERROR: %s failed: %s\n",
                         __func__, strerror(-rc));
+       } else {
+               printerr(LL_DEBUG, "response written ok\n");
        }
-       printerr(LL_DEBUG, "response written ok\n");
 
        close(fd);
 out_path:
@@ -1061,6 +1062,7 @@ int handle_channel_request(int fd)
                         lustre_mech);
 
 out_err:
+       printerr(LL_INFO, "to send response with rc=%d\n", rc ? -EACCES : 0);
        /* Failures send a null token */
        rc = send_response(rc, hash, &snd.in_handle, &snd.in_tok,
                           snd.maj_stat, snd.min_stat,