From: Sebastien Buisson Date: Thu, 16 May 2024 09:58:24 +0000 (+0200) Subject: LU-17852 gss: do not use expired reverse gss contexts X-Git-Tag: 2.15.64~22 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F27%2F55127%2F9;p=fs%2Flustre-release.git LU-17852 gss: do not use expired reverse gss contexts On server side, a reverse context matches a gss context established on client side. These reverse contexts have a expiration time, and are replaced with fresh ones when they expire. So get rid of expired reverse contexts when we find them in the gsk_clist. And when we look up for a context, do not continue using the current one if it is expired. Add sanity-krb5 test_200 to check the expired reverse contexts. Test-Parameters: kerberos=true testlist=sanity-krb5 Signed-off-by: Sebastien Buisson Change-Id: I11f2d8ab298073f9d5bedff187b67f2ca289ae47 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55127 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Aurelien Degremont Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/ptlrpc/gss/gss_keyring.c b/lustre/ptlrpc/gss/gss_keyring.c index 9894825..c1d61655 100644 --- a/lustre/ptlrpc/gss/gss_keyring.c +++ b/lustre/ptlrpc/gss/gss_keyring.c @@ -486,7 +486,11 @@ struct ptlrpc_cli_ctx * sec_lookup_root_ctx_kr(struct ptlrpc_sec *sec) ctx = gsec_kr->gsk_root_ctx; - if (ctx == NULL && unlikely(sec_is_reverse(sec))) { + /* Need to find valid rev ctx if we do not have one yet, + * or if it is expired. + */ + if (unlikely(sec_is_reverse(sec)) && + (ctx == NULL || ctx->cc_expire < now)) { struct ptlrpc_cli_ctx *tmp; /* For reverse context, browse list and pick the one with @@ -527,6 +531,7 @@ void rvs_sec_install_root_ctx_kr(struct ptlrpc_sec *sec, { struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec); struct ptlrpc_cli_ctx *ctx; + struct hlist_node *next; time64_t now; ENTRY; @@ -536,22 +541,31 @@ void rvs_sec_install_root_ctx_kr(struct ptlrpc_sec *sec, now = ktime_get_real_seconds(); - /* set all existing ctxs short expiry */ - hlist_for_each_entry(ctx, &gsec_kr->gsk_clist, cc_cache) { - if (ctx->cc_expire > now + RVS_CTX_EXPIRE_NICE) { - ctx->cc_early_expire = 1; - ctx->cc_expire = now + RVS_CTX_EXPIRE_NICE; - } - } + /* set all existing ctxs short expiry */ + hlist_for_each_entry_safe(ctx, next, &gsec_kr->gsk_clist, cc_cache) { + if (ctx->cc_expire > now + RVS_CTX_EXPIRE_NICE) { + ctx->cc_early_expire = 1; + ctx->cc_expire = now + RVS_CTX_EXPIRE_NICE; + } else if (ctx != gsec_kr->gsk_root_ctx && + ctx->cc_expire < now) { + /* unlist expired context to remove it from gsk_clist */ + if (ctx_unlist_kr(ctx, 1)) { + /* release unlisted ctx to destroy it */ + set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags); + ctx_release_kr(ctx, 1); + } + } + } - /* if there's root_ctx there, instead obsolete the current - * immediately, we leave it continue operating for a little while. - * hopefully when the first backward rpc with newest ctx send out, - * the client side already have the peer ctx well established. */ - ctx_enlist_kr(new_ctx, gsec_kr->gsk_root_ctx ? 0 : 1, 1); + /* If there's root_ctx there, instead obsolete the current + * immediately, we leave it continue operating for a little while. + * hopefully when the first backward rpc with newest ctx send out, + * the client side already have the peer ctx well established. + */ + ctx_enlist_kr(new_ctx, gsec_kr->gsk_root_ctx ? 0 : 1, 1); - if (key) - bind_key_ctx(key, new_ctx); + if (key) + bind_key_ctx(key, new_ctx); spin_unlock(&sec->ps_lock); } @@ -1202,10 +1216,11 @@ int gss_sec_flush_ctx_cache_kr(struct ptlrpc_sec *sec, static void gss_sec_gc_ctx_kr(struct ptlrpc_sec *sec) { - struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec); - struct hlist_head freelist = HLIST_HEAD_INIT; + struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec); + struct hlist_head freelist = HLIST_HEAD_INIT; + struct ptlrpc_cli_ctx *ctx; + struct gss_cli_ctx *gctx; struct hlist_node *next; - struct ptlrpc_cli_ctx *ctx; ENTRY; CDEBUG(D_SEC, "running gc\n"); @@ -1218,8 +1233,13 @@ void gss_sec_gc_ctx_kr(struct ptlrpc_sec *sec) atomic_inc(&ctx->cc_refcount); if (cli_ctx_check_death(ctx) && ctx_unlist_kr(ctx, 1)) { + gctx = ctx2gctx(ctx); + hlist_add_head(&ctx->cc_cache, &freelist); - CWARN("unhashed ctx %p\n", ctx); + CWARN("%s: cleaning gss ctx hdl %#llx:%#llx\n", + ctx->cc_sec->ps_import->imp_obd->obd_name, + gss_handle_to_u64(&gctx->gc_handle), + gss_handle_to_u64(&gctx->gc_svc_handle)); } else { LASSERT(atomic_read(&ctx->cc_refcount) >= 2); atomic_dec(&ctx->cc_refcount); diff --git a/lustre/ptlrpc/gss/gss_svc_upcall.c b/lustre/ptlrpc/gss/gss_svc_upcall.c index ff04f8e..85b1ab6 100644 --- a/lustre/ptlrpc/gss/gss_svc_upcall.c +++ b/lustre/ptlrpc/gss/gss_svc_upcall.c @@ -961,7 +961,7 @@ struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req, rscp = gss_svc_searchbyctx(&gw->gw_handle); if (IS_ERR_OR_NULL(rscp)) { - CWARN("Invalid gss ctx idx %#llx from %s\n", + CWARN("Invalid gss ctx hdl %#llx from %s\n", gss_handle_to_u64(&gw->gw_handle), libcfs_nidstr(&req->rq_peer.nid)); return NULL; diff --git a/lustre/ptlrpc/gss/sec_gss.c b/lustre/ptlrpc/gss/sec_gss.c index a09d789..8e28b5c 100644 --- a/lustre/ptlrpc/gss/sec_gss.c +++ b/lustre/ptlrpc/gss/sec_gss.c @@ -661,13 +661,15 @@ int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx, struct gss_header *ghdr) { struct gss_err_header *errhdr; + struct gss_cli_ctx *gctx; int rc; LASSERT(ghdr->gh_proc == PTLRPC_GSS_PROC_ERR); errhdr = (struct gss_err_header *) ghdr; - CWARN("%s: req x%llu/t%llu, ctx %p idx %#llx(%u->%s): %sserver respond (%08x/%08x)\n", + CDEBUG(D_SEC, + "%s: req x%llu/t%llu, ctx %p idx %#llx(%u->%s): %sserver respond (%08x/%08x)\n", ctx->cc_sec->ps_import->imp_obd->obd_name, req->rq_xid, req->rq_transno, ctx, gss_handle_to_u64(&ctx2gctx(ctx)->gc_handle), @@ -708,10 +710,16 @@ int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx, * if we add new mechanism, make sure the correct error are * returned in this case. */ - CWARN("%s: %s might have lost the context (%s), retrying\n", + gctx = ctx2gctx(ctx); + CWARN("%s: %s context (hdl %#llx:%#llx) from %s %s (uid %u), retrying\n", ctx->cc_sec->ps_import->imp_obd->obd_name, + errhdr->gh_major == GSS_S_NO_CONTEXT ? + "unknown" : "wrong signature for", + gss_handle_to_u64(&gctx->gc_handle), + gss_handle_to_u64(&gctx->gc_svc_handle), sec_is_reverse(ctx->cc_sec) ? "client" : "server", - errhdr->gh_major == GSS_S_NO_CONTEXT ? "NO_CONTEXT" : "BAD_SIG"); + req->rq_import ? obd_import_nid2str(req->rq_import) : "0@<0:0>", + ctx->cc_vcred.vc_uid); sptlrpc_cli_ctx_expire(ctx); diff --git a/lustre/tests/sanity-krb5.sh b/lustre/tests/sanity-krb5.sh index 4ac0ee6..8b6cdb2 100755 --- a/lustre/tests/sanity-krb5.sh +++ b/lustre/tests/sanity-krb5.sh @@ -869,6 +869,54 @@ test_151() { } run_test 151 "secure mgs connection: server flavor control" +test_200() { + local nid=$(lctl list_nids | grep ${NETTYPE} | head -n1) + local nidstr="peer_nid: ${nid}," + local count + + lfs df -h + do_facet $SINGLEMDS $LCTL get_param -n \ + mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" + count=$(do_facet $SINGLEMDS $LCTL get_param -n \ + mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" | + grep -c 'delta: -') + echo "found $count expired reverse contexts (1)" + # We can have up to 3 expired contexts in the normal case: + # - the newest one, that is just about to be renewed + # - the previous one that had expired + # - the one currently referenced in the sec, not updated in the absence + # of client activity. + (( count < 4 )) || error "expired reverse contexts should be <= 3 (1)" + + # unmount to get rid of old context + umount_client $MOUNT || error "umount $MOUNT failed" + kdestroy + stack_trap "mount_client $MOUNT ${MOUNT_OPTS} || true" EXIT + if is_mounted $MOUNT2; then + umount_client $MOUNT2 || error "umount $MOUNT2 failed" + stack_trap "mount_client $MOUNT2 ${MOUNT_OPTS}" EXIT + fi + + # update ticket lifetime to be 45s + stack_trap "/usr/bin/cp -f /etc/krb5.conf.bkp /etc/krb5.conf" EXIT + sed -i.bkp s+[^#]ticket_lifetime.*+ticket_lifetime\ =\ 45s+ \ + /etc/krb5.conf + # establish new context, and wait 3x lifetime + mount_client $MOUNT ${MOUNT_OPTS} || error "remount failed" + lfs df -h + sleep 135 + # re-activate connections, and look for reverse contexts on server side + lfs df -h + do_facet $SINGLEMDS $LCTL get_param -n \ + mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" + count=$(do_facet $SINGLEMDS $LCTL get_param -n \ + mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" | + grep -c 'delta: -') + echo "found $count expired reverse contexts (2)" + (( count < 4 )) || error "expired reverse contexts should be <= 3 (2)" +} +run_test 200 "check expired reverse gss contexts" + complete_test $SECONDS set_flavor_all null cleanup_gss