From 10f98eb8a95a9efc7a123b69d2f05cf0f8ac2fb7 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Thu, 8 Feb 2024 13:44:21 +0100 Subject: [PATCH] LU-17484 gss: reply error for SEC_CTX_INIT on wrong node When a server receives a SEC_CTX_INIT request for a target that is not available (either stopping, or not set up yet, or moved to a failover node), the request gets dropped. This makes the client-side RPC time out, increasing the time it takes to establish a proper gss context with the target, because it slows down the HA mechanism that tries alternate failover NIDs. Instead of dropping the request reply for SEC_CTX_INIT, the server needs to send back a proper error reply. The client will then be able to immediately try alternate failover NIDs, speeding mount/reconnect process up, and avoiding potential eviction. Lustre-change: https://review.whamcloud.com/53970 Lustre-commit: 3d635dd3f24421c181aca5673cd81ed8f3e2c622 Test-Parameters: trivial Test-Parameters: kerberos=true testlist=sanity-krb5 Test-Parameters: testgroup=review-dne-selinux-ssk-part-2 Signed-off-by: Sebastien Buisson Change-Id: Id2cefaa7d54729a63c7be13b65d7ace579bcaa78 Reviewed-by: Andreas Dilger Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54157 Tested-by: jenkins Tested-by: Maloo --- lustre/ldlm/ldlm_lib.c | 5 +- lustre/ptlrpc/gss/lproc_gss.c | 7 ++ lustre/ptlrpc/gss/sec_gss.c | 205 +++++++++++++++++++++++----------------- lustre/ptlrpc/import.c | 5 +- lustre/utils/gss/svcgssd_proc.c | 4 +- 5 files changed, 134 insertions(+), 92 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index b89b5c1..027e7bf 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1134,8 +1134,9 @@ int target_handle_connect(struct ptlrpc_request *req) if (!target) { deuuidify(str, NULL, &target_start, &target_len); LCONSOLE_ERROR_MSG(0x137, - "%s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n", - str, libcfs_nid2str(req->rq_peer.nid)); + "%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n", + target_len, target_start, + libcfs_nid2str(req->rq_peer.nid)); GOTO(out, rc = -ENODEV); } diff --git a/lustre/ptlrpc/gss/lproc_gss.c b/lustre/ptlrpc/gss/lproc_gss.c index ed29f3e..0fbd032 100644 --- a/lustre/ptlrpc/gss/lproc_gss.c +++ b/lustre/ptlrpc/gss/lproc_gss.c @@ -269,6 +269,13 @@ again: rc = upcall_cache_downcall(rsicache, param->sid_err, param->sid_hash, param); + /* The caller, i.e. the userspace process writing to rsi_info, only + * needs to know about invalid values. Other errors are processed + * directly in the kernel. + */ + if (rc != -EINVAL) + rc = 0; + out: if (param != NULL) OBD_FREE_LARGE(param, size); diff --git a/lustre/ptlrpc/gss/sec_gss.c b/lustre/ptlrpc/gss/sec_gss.c index e35a0fb..489646f 100644 --- a/lustre/ptlrpc/gss/sec_gss.c +++ b/lustre/ptlrpc/gss/sec_gss.c @@ -1977,120 +1977,151 @@ int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor) RETURN(0); } -static -int gss_svc_handle_init(struct ptlrpc_request *req, - struct gss_wire_ctx *gw) -{ - struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); - struct lustre_msg *reqbuf = req->rq_reqbuf; - struct obd_uuid *uuid; - struct obd_device *target; - rawobj_t uuid_obj, rvs_hdl, in_token; - __u32 lustre_svc; - __u32 *secdata, seclen; - int swabbed, rc; - ENTRY; +static int gss_svc_handle_init(struct ptlrpc_request *req, + struct gss_wire_ctx *gw) +{ + struct gss_svc_reqctx *grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx); + struct lustre_msg *reqbuf = req->rq_reqbuf; + struct obd_uuid *uuid; + struct obd_device *target; + rawobj_t uuid_obj, rvs_hdl, in_token; + __u32 lustre_svc; + __u32 *secdata, seclen; + int swabbed, rc; - CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc, - libcfs_nid2str(req->rq_peer.nid)); + ENTRY; + CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc, + libcfs_nid2str(req->rq_peer.nid)); - req->rq_ctx_init = 1; + req->rq_ctx_init = 1; - if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) { - CERROR("unexpected bulk flag\n"); - RETURN(SECSVC_DROP); - } + if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "unexpected bulk flag: rc = %d\n", rc); + RETURN(rc); + } - if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) { - CERROR("proc %u: invalid handle length %u\n", - gw->gw_proc, gw->gw_handle.len); - RETURN(SECSVC_DROP); - } + if (gw->gw_proc == PTLRPC_GSS_PROC_INIT && gw->gw_handle.len != 0) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "proc %u: invalid handle length %u: rc = %d\n", + gw->gw_proc, gw->gw_handle.len, rc); + RETURN(rc); + } - if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4){ - CERROR("Invalid bufcount %d\n", reqbuf->lm_bufcount); - RETURN(SECSVC_DROP); - } + if (reqbuf->lm_bufcount < 3 || reqbuf->lm_bufcount > 4){ + rc = SECSVC_DROP; + CDEBUG(D_SEC, "Invalid bufcount %d: rc = %d\n", + reqbuf->lm_bufcount, rc); + RETURN(rc); + } swabbed = req_capsule_req_need_swab(&req->rq_pill); - /* ctx initiate payload is in last segment */ - secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0); - seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1]; + /* ctx initiate payload is in last segment */ + secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0); + seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1]; - if (seclen < 4 + 4) { - CERROR("sec size %d too small\n", seclen); - RETURN(SECSVC_DROP); - } + if (seclen < 4 + 4) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "sec size %d too small: rc = %d\n", seclen, rc); + RETURN(rc); + } - /* lustre svc type */ - lustre_svc = le32_to_cpu(*secdata++); - seclen -= 4; + /* lustre svc type */ + lustre_svc = le32_to_cpu(*secdata++); + seclen -= 4; - /* extract target uuid, note this code is somewhat fragile - * because touched internal structure of obd_uuid */ - if (rawobj_extract(&uuid_obj, &secdata, &seclen)) { - CERROR("failed to extract target uuid\n"); - RETURN(SECSVC_DROP); - } - uuid_obj.data[uuid_obj.len - 1] = '\0'; + /* extract target uuid, note this code is somewhat fragile + * because touched internal structure of obd_uuid */ + if (rawobj_extract(&uuid_obj, &secdata, &seclen)) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "failed to extract target uuid: rc = %d\n", rc); + RETURN(rc); + } + uuid_obj.data[uuid_obj.len - 1] = '\0'; + + uuid = (struct obd_uuid *) uuid_obj.data; + target = class_uuid2obd(uuid); + if (!target || target->obd_stopping || !target->obd_set_up) { + char *target_start; + int target_len; + + if (gss_pack_err_notify(req, GSS_S_NO_CONTEXT, 0) == 0) + rc = SECSVC_COMPLETE; + else + rc = SECSVC_DROP; + + deuuidify(uuid->uuid, NULL, &target_start, &target_len); + LCONSOLE_ERROR("%.*s: not available for GSS context init from %s (%s).\n", + target_len, target_start, + libcfs_nid2str(req->rq_peer.nid), + target ? + (target->obd_stopping ? + "stopping" : "not set up") : + "no target"); + RETURN(rc); + } - uuid = (struct obd_uuid *) uuid_obj.data; - target = class_uuid2obd(uuid); - if (!target || target->obd_stopping || !target->obd_set_up) { - CERROR("target '%s' is not available for context init (%s)\n", - uuid->uuid, target == NULL ? "no target" : - (target->obd_stopping ? "stopping" : "not set up")); - RETURN(SECSVC_DROP); - } + /* extract reverse handle */ + if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "%s: failed extract reverse handle: rc = %d\n", + target->obd_name, rc); + RETURN(rc); + } - /* extract reverse handle */ - if (rawobj_extract(&rvs_hdl, &secdata, &seclen)) { - CERROR("failed extract reverse handle\n"); - RETURN(SECSVC_DROP); - } + /* extract token */ + if (rawobj_extract(&in_token, &secdata, &seclen)) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "%s: can't extract token: rc = %d\n", + target->obd_name, rc); + RETURN(rc); + } - /* extract token */ - if (rawobj_extract(&in_token, &secdata, &seclen)) { - CERROR("can't extract token\n"); - RETURN(SECSVC_DROP); - } + rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc, + &rvs_hdl, &in_token); + if (rc != SECSVC_OK) + RETURN(rc); - rc = gss_svc_upcall_handle_init(req, grctx, gw, target, lustre_svc, - &rvs_hdl, &in_token); - if (rc != SECSVC_OK) - RETURN(rc); if (grctx->src_ctx->gsc_usr_mds || grctx->src_ctx->gsc_usr_oss || grctx->src_ctx->gsc_usr_root) - CDEBUG(D_SEC, "create svc ctx %p: user from %s authenticated as %s\n", + CDEBUG(D_SEC, + "%s: create svc ctx %p: user from %s authenticated as %s\n", + target->obd_name, grctx->src_ctx, libcfs_nid2str(req->rq_peer.nid), grctx->src_ctx->gsc_usr_root ? "root" : (grctx->src_ctx->gsc_usr_mds ? "mds" : - (grctx->src_ctx->gsc_usr_oss ? "oss" : "null"))); + (grctx->src_ctx->gsc_usr_oss ? "oss" : "null"))); else - CDEBUG(D_SEC, "create svc ctx %p: accept user %u from %s\n", + CDEBUG(D_SEC, "%s: create svc ctx %p: accept user %u from %s\n", + target->obd_name, grctx->src_ctx, grctx->src_ctx->gsc_uid, libcfs_nid2str(req->rq_peer.nid)); - if (gw->gw_flags & LUSTRE_GSS_PACK_USER) { - if (reqbuf->lm_bufcount < 4) { - CERROR("missing user descriptor\n"); - RETURN(SECSVC_DROP); - } - if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) { - CERROR("Mal-formed user descriptor\n"); - RETURN(SECSVC_DROP); - } - - req->rq_pack_udesc = 1; - req->rq_user_desc = lustre_msg_buf(reqbuf, 2, 0); - } + if (gw->gw_flags & LUSTRE_GSS_PACK_USER) { + if (reqbuf->lm_bufcount < 4) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, "%s: missing user descriptor: rc = %d\n", + target->obd_name, rc); + RETURN(rc); + } + if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) { + rc = SECSVC_DROP; + CDEBUG(D_SEC, + "%s: Mal-formed user descriptor: rc = %d\n", + target->obd_name, rc); + RETURN(rc); + } + + req->rq_pack_udesc = 1; + req->rq_user_desc = lustre_msg_buf(reqbuf, 2, 0); + } - req->rq_reqmsg = lustre_msg_buf(reqbuf, 1, 0); - req->rq_reqlen = lustre_msg_buflen(reqbuf, 1); + req->rq_reqmsg = lustre_msg_buf(reqbuf, 1, 0); + req->rq_reqlen = lustre_msg_buflen(reqbuf, 1); - RETURN(rc); + RETURN(rc); } /* diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index f1ac344..2f2cccf 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -145,9 +145,10 @@ void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len) return; if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR), - UUID_STR, strlen(UUID_STR))) - *uuid_len -= strlen(UUID_STR); + UUID_STR, strlen(UUID_STR))) + *uuid_len -= strlen(UUID_STR); } +EXPORT_SYMBOL(deuuidify); /* Must be called with imp_lock held! */ static void ptlrpc_deactivate_import_nolock(struct obd_import *imp) diff --git a/lustre/utils/gss/svcgssd_proc.c b/lustre/utils/gss/svcgssd_proc.c index a990e97..88e061a 100644 --- a/lustre/utils/gss/svcgssd_proc.c +++ b/lustre/utils/gss/svcgssd_proc.c @@ -250,8 +250,9 @@ static int send_response(int auth_res, __u64 hash, rc = -errno; printerr(LL_ERR, "ERROR: %s failed: %s\n", __func__, strerror(-rc)); + } else { + printerr(LL_DEBUG, "response written ok\n"); } - printerr(LL_DEBUG, "response written ok\n"); close(fd); out_path: @@ -1061,6 +1062,7 @@ int handle_channel_request(int fd) lustre_mech); out_err: + printerr(LL_INFO, "to send response with rc=%d\n", rc ? -EACCES : 0); /* Failures send a null token */ rc = send_response(rc, hash, &snd.in_handle, &snd.in_tok, snd.maj_stat, snd.min_stat, -- 1.8.3.1