Whamcloud - gitweb
LU-17484 gss: reply error for SEC_CTX_INIT on wrong node 70/53970/6
authorSebastien Buisson <sbuisson@ddn.com>
Thu, 8 Feb 2024 12:44:21 +0000 (13:44 +0100)
committerOleg Drokin <green@whamcloud.com>
Fri, 23 Feb 2024 07:17:19 +0000 (07:17 +0000)
When a server receives a SEC_CTX_INIT request for a target that is not
available (either stopping, or not set up yet, or moved to a failover
node), the request gets dropped. This makes the client-side RPC time
out, increasing the time it takes to establish a proper gss context
with the target, because it slows down the HA mechanism that tries
alternate failover NIDs.
Instead of dropping the request reply for SEC_CTX_INIT, the server
needs to send back a proper error reply. The client will then be able
to immediately try alternate failover NIDs, speeding mount/reconnect
process up, and avoiding potential eviction.

Test-Parameters: trivial
Test-Parameters: kerberos=true testlist=sanity-krb5
Test-Parameters: testgroup=review-dne-selinux-ssk-part-2
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: Id2cefaa7d54729a63c7be13b65d7ace579bcaa78
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53970
Reviewed-by: Aurelien Degremont <adegremont@nvidia.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/ldlm/ldlm_lib.c
lustre/ptlrpc/gss/lproc_gss.c
lustre/ptlrpc/gss/sec_gss.c
lustre/ptlrpc/import.c
lustre/utils/gss/svcgssd_proc.c

index c0793b5..b2dd5d5 100644 (file)
@@ -1115,8 +1115,9 @@ int target_handle_connect(struct ptlrpc_request *req)
        if (!target) {
                deuuidify(str, NULL, &target_start, &target_len);
                LCONSOLE_ERROR_MSG(0x137,
-                                  "%s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n",
-                                  str, libcfs_nidstr(&req->rq_peer.nid));
+                                  "%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n",
+                                  target_len, target_start,
+                                  libcfs_nidstr(&req->rq_peer.nid));
                GOTO(out, rc = -ENODEV);
        }
 
index 1fe6b0d..593b5d1 100644 (file)
@@ -271,6 +271,13 @@ again:
        rc = upcall_cache_downcall(rsicache, param->sid_err,
                                   param->sid_hash, param);
 
+       /* The caller, i.e. the userspace process writing to rsi_info, only
+        * needs to know about invalid values. Other errors are processed
+        * directly in the kernel.
+        */
+       if (rc != -EINVAL)
+               rc = 0;
+
 out:
        if (param != NULL)
                OBD_FREE_LARGE(param, size);
index 5c88883..0c302fb 100644 (file)
@@ -1938,35 +1938,39 @@ int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor)
 static
 int gss_svc_handle_init(struct ptlrpc_request *req, struct gss_wire_ctx *gw)
 {
-       struct gss_svc_reqctx     *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
-       struct lustre_msg         *reqbuf = req->rq_reqbuf;
-       struct obd_uuid           *uuid;
-       struct obd_device         *target;
-       rawobj_t                   uuid_obj, rvs_hdl, in_token;
-       __u32                      lustre_svc;
-       __u32                     *secdata, seclen;
-       int                        swabbed, rc;
-       ENTRY;
+       struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+       struct lustre_msg *reqbuf = req->rq_reqbuf;
+       struct obd_uuid *uuid;
+       struct obd_device *target;
+       rawobj_t uuid_obj, rvs_hdl, in_token;
+       __u32 lustre_svc;
+       __u32 *secdata, seclen;
+       int swabbed, rc;
 
+       ENTRY;
        CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc,
               libcfs_nidstr(&req->rq_peer.nid));
 
        req->rq_ctx_init = 1;
 
        if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
-               CERROR("unexpected bulk flag\n");
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "unexpected bulk flag: rc = %d\n", rc);
+               RETURN(rc);
        }
 
        if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) {
-               CERROR("proc %u: invalid handle length %u\n",
-                      gw->gw_proc, gw->gw_handle.len);
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "proc %u: invalid handle length %u: rc = %d\n",
+                      gw->gw_proc, gw->gw_handle.len, rc);
+               RETURN(rc);
        }
 
        if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4) {
-               CERROR("Invalid bufcount %d\n", reqbuf->lm_bufcount);
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "Invalid bufcount %d: rc = %d\n",
+                      reqbuf->lm_bufcount, rc);
+               RETURN(rc);
        }
 
        swabbed = req_capsule_req_need_swab(&req->rq_pill);
@@ -1976,8 +1980,9 @@ int gss_svc_handle_init(struct ptlrpc_request *req, struct gss_wire_ctx *gw)
        seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1];
 
        if (seclen < 4 + 4) {
-               CERROR("sec size %d too small\n", seclen);
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "sec size %d too small: rc = %d\n", seclen, rc);
+               RETURN(rc);
        }
 
        /* lustre svc type */
@@ -1988,30 +1993,48 @@ int gss_svc_handle_init(struct ptlrpc_request *req, struct gss_wire_ctx *gw)
         * because touched internal structure of obd_uuid
         */
        if (rawobj_extract(&uuid_obj, &secdata, &seclen)) {
-               CERROR("failed to extract target uuid\n");
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "failed to extract target uuid: rc = %d\n", rc);
+               RETURN(rc);
        }
        uuid_obj.data[uuid_obj.len - 1] = '\0';
 
        uuid = (struct obd_uuid *) uuid_obj.data;
        target = class_uuid2obd(uuid);
        if (!target || target->obd_stopping || !target->obd_set_up) {
-               CERROR("target '%s' is not available for context init (%s)\n",
-                      uuid->uuid, target == NULL ? "no target" :
-                      (target->obd_stopping ? "stopping" : "not set up"));
-               RETURN(SECSVC_DROP);
+               char *target_start;
+               int target_len;
+
+               if (gss_pack_err_notify(req, GSS_S_NO_CONTEXT, 0) == 0)
+                       rc = SECSVC_COMPLETE;
+               else
+                       rc = SECSVC_DROP;
+
+               deuuidify(uuid->uuid, NULL, &target_start, &target_len);
+               LCONSOLE_ERROR("%.*s: not available for GSS context init from %s (%s).\n",
+                              target_len, target_start,
+                              libcfs_nidstr(&req->rq_peer.nid),
+                              target ?
+                              (target->obd_stopping ?
+                               "stopping" : "not set up") :
+                              "no target");
+               RETURN(rc);
        }
 
        /* extract reverse handle */
        if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) {
-               CERROR("failed extract reverse handle\n");
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "%s: failed extract reverse handle: rc = %d\n",
+                      target->obd_name, rc);
+               RETURN(rc);
        }
 
        /* extract token */
        if (rawobj_extract(&in_token, &secdata, &seclen)) {
-               CERROR("can't extract token\n");
-               RETURN(SECSVC_DROP);
+               rc = SECSVC_DROP;
+               CDEBUG(D_SEC, "%s: can't extract token: rc = %d\n",
+                      target->obd_name, rc);
+               RETURN(rc);
        }
 
        rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc,
@@ -2021,24 +2044,32 @@ int gss_svc_handle_init(struct ptlrpc_request *req, struct gss_wire_ctx *gw)
 
        if (grctx->src_ctx->gsc_usr_mds || grctx->src_ctx->gsc_usr_oss ||
            grctx->src_ctx->gsc_usr_root)
-               CDEBUG(D_SEC, "create svc ctx %p: user from %s authenticated as %s\n",
+               CDEBUG(D_SEC,
+                      "%s: create svc ctx %p: user from %s authenticated as %s\n",
+                      target->obd_name,
                       grctx->src_ctx, libcfs_nidstr(&req->rq_peer.nid),
                       grctx->src_ctx->gsc_usr_root ? "root" :
                       (grctx->src_ctx->gsc_usr_mds ? "mds" :
                        (grctx->src_ctx->gsc_usr_oss ? "oss" : "null")));
        else
-               CDEBUG(D_SEC, "create svc ctx %p: accept user %u from %s\n",
+               CDEBUG(D_SEC, "%s: create svc ctx %p: accept user %u from %s\n",
+                      target->obd_name,
                       grctx->src_ctx, grctx->src_ctx->gsc_uid,
                       libcfs_nidstr(&req->rq_peer.nid));
 
        if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
                if (reqbuf->lm_bufcount < 4) {
-                       CERROR("missing user descriptor\n");
-                       RETURN(SECSVC_DROP);
+                       rc = SECSVC_DROP;
+                       CDEBUG(D_SEC, "%s: missing user descriptor: rc = %d\n",
+                              target->obd_name, rc);
+                       RETURN(rc);
                }
                if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) {
-                       CERROR("Mal-formed user descriptor\n");
-                       RETURN(SECSVC_DROP);
+                       rc = SECSVC_DROP;
+                       CDEBUG(D_SEC,
+                              "%s: Mal-formed user descriptor: rc = %d\n",
+                              target->obd_name, rc);
+                       RETURN(rc);
                }
 
                req->rq_pack_udesc = 1;
index cb27a1e..c8432d1 100644 (file)
@@ -148,6 +148,7 @@ void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len)
                    UUID_STR, strlen(UUID_STR)))
                *uuid_len -= strlen(UUID_STR);
 }
+EXPORT_SYMBOL(deuuidify);
 
 /* Must be called with imp_lock held! */
 static void ptlrpc_deactivate_import_nolock(struct obd_import *imp)
index 3e6f001..517683e 100644 (file)
@@ -250,8 +250,9 @@ static int send_response(int auth_res, __u64 hash,
                rc = -errno;
                printerr(LL_ERR, "ERROR: %s failed: %s\n",
                         __func__, strerror(-rc));
+       } else {
+               printerr(LL_DEBUG, "response written ok\n");
        }
-       printerr(LL_DEBUG, "response written ok\n");
 
        close(fd);
 out_path:
@@ -1069,6 +1070,7 @@ int handle_channel_request(int fd)
                         lustre_mech);
 
 out_err:
+       printerr(LL_INFO, "to send response with rc=%d\n", rc ? -EACCES : 0);
        /* Failures send a null token */
        rc = send_response(rc, hash, &snd.in_handle, &snd.in_tok,
                           snd.maj_stat, snd.min_stat,