From d0194a4b5f6efa26d5473c2793b525f5fdb77e67 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Fri, 13 Oct 2023 17:19:16 +0200 Subject: [PATCH] LU-17015 gss: avoid request replay Lustre's upcall cache has a retry mechanism in case the upcall was interrupted or failed and we timed out waiting. In this case we do our best to retry and do the upcall again. But when the upcall cache is used for GSS contexts, the upcall cannot be done twice with same data. The GSSAPI implements security measures that forbids that kind of request replay, to prevent man-in-the-middle attacks for instance. Add a new uc_acquire_replay field to struct upcall_cache, so that upcall cache users can tell if acquire upcall can be replayed. For identity upcall, this replay is fine. But for GSS contexts we need to avoid those replays. And bump upcall cache timeout value from 20s to 30s for GSS context init requests. Also add more debug messages to gss code for both client and server sides, and both kernel and userspace. Test-Parameters: kerberos=true testlist=sanity-krb5 Signed-off-by: Sebastien Buisson Change-Id: I56decc83a4f0d21be420e87cb0417826011932af Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52689 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Aurelien Degremont Reviewed-by: Oleg Drokin --- lustre/include/upcall_cache.h | 3 ++- lustre/mdt/mdt_handler.c | 1 + lustre/obdclass/upcall_cache.c | 12 ++++++++++-- lustre/ptlrpc/gss/gss_cli_upcall.c | 3 +++ lustre/ptlrpc/gss/gss_svc_upcall.c | 4 +++- lustre/utils/gss/lgss_keyring.c | 6 ++++++ lustre/utils/gss/lgss_utils.c | 39 ++++++++++++++++++++++++++++++++++++++ lustre/utils/gss/lgss_utils.h | 3 +++ lustre/utils/gss/svcgssd_proc.c | 4 ++-- 9 files changed, 69 insertions(+), 6 deletions(-) diff --git a/lustre/include/upcall_cache.h b/lustre/include/upcall_cache.h index 3530eda..84816b3 100644 --- a/lustre/include/upcall_cache.h +++ b/lustre/include/upcall_cache.h @@ -144,6 +144,7 @@ struct upcall_cache { char uc_name[40]; /* for upcall */ char uc_upcall[UC_CACHE_UPCALL_MAXPATH]; + bool uc_acquire_replay; time64_t uc_acquire_expire; /* seconds */ time64_t uc_entry_expire; /* seconds */ struct upcall_cache_ops *uc_ops; @@ -174,7 +175,7 @@ static inline void upcall_cache_flush_all(struct upcall_cache *cache) void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args); struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, int hashsz, time64_t entry_expire, - time64_t acquire_expire, + time64_t acquire_expire, bool replayable, struct upcall_cache_ops *ops); void upcall_cache_cleanup(struct upcall_cache *cache); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 8547167..6d4fe1e 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -6345,6 +6345,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, UC_IDCACHE_HASH_SIZE, 1200, /* entry expire: 20 mn */ 30, /* acquire expire: 30 s */ + true, /* acquire can replay */ &mdt_identity_upcall_cache_ops); if (IS_ERR(m->mdt_identity_cache)) { rc = PTR_ERR(m->mdt_identity_cache); diff --git a/lustre/obdclass/upcall_cache.c b/lustre/obdclass/upcall_cache.c index ddd4d52..cdf2e44 100644 --- a/lustre/obdclass/upcall_cache.c +++ b/lustre/obdclass/upcall_cache.c @@ -52,6 +52,8 @@ static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache, entry->ue_key = key; atomic_set(&entry->ue_refcount, 0); init_waitqueue_head(&entry->ue_waitq); + entry->ue_acquire_expire = 0; + entry->ue_expire = 0; if (cache->uc_ops->init_entry) cache->uc_ops->init_entry(entry, args); return entry; @@ -230,6 +232,11 @@ find_again: if (UC_CACHE_IS_ACQUIRING(entry)) { /* we're interrupted or upcall failed in the middle */ rc = left > 0 ? -EINTR : -ETIMEDOUT; + /* if we waited uc_acquire_expire, we can try again + * with same data, but only if acquire is replayable + */ + if (left <= 0 && !cache->uc_acquire_replay) + failedacquiring = true; put_entry(cache, entry); if (!failedacquiring) { spin_unlock(&cache->uc_lock); @@ -348,7 +355,7 @@ int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, if (err) { CDEBUG(D_OTHER, "%s: upcall for key %llu returned %d\n", cache->uc_name, entry->ue_key, err); - GOTO(out, rc = -EINVAL); + GOTO(out, rc = err); } if (!UC_CACHE_IS_ACQUIRING(entry)) { @@ -447,7 +454,7 @@ EXPORT_SYMBOL(upcall_cache_flush_one); struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, int hashsz, time64_t entry_expire, - time64_t acquire_expire, + time64_t acquire_expire, bool replayable, struct upcall_cache_ops *ops) { struct upcall_cache *cache; @@ -472,6 +479,7 @@ struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, strlcpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall)); cache->uc_entry_expire = entry_expire; cache->uc_acquire_expire = acquire_expire; + cache->uc_acquire_replay = replayable; cache->uc_ops = ops; RETURN(cache); diff --git a/lustre/ptlrpc/gss/gss_cli_upcall.c b/lustre/ptlrpc/gss/gss_cli_upcall.c index 18dabd6..3e901e5 100644 --- a/lustre/ptlrpc/gss/gss_cli_upcall.c +++ b/lustre/ptlrpc/gss/gss_cli_upcall.c @@ -336,6 +336,9 @@ int gss_do_ctx_init_rpc(char __user *buffer, unsigned long count) param.status = rc; if (rc != -EACCES) param.status = -ETIMEDOUT; + CDEBUG(D_SEC, + "%s: ctx init req got %d, returning to userspace status %llu\n", + obd->obd_name, rc, param.status); goto out_copy; } diff --git a/lustre/ptlrpc/gss/gss_svc_upcall.c b/lustre/ptlrpc/gss/gss_svc_upcall.c index 9adcfdc..5894674 100644 --- a/lustre/ptlrpc/gss/gss_svc_upcall.c +++ b/lustre/ptlrpc/gss/gss_svc_upcall.c @@ -1720,7 +1720,8 @@ int __init gss_init_svc_upcall(void) rsicache = upcall_cache_init(RSI_CACHE_NAME, RSI_UPCALL_PATH, UC_RSICACHE_HASH_SIZE, 3600, /* entry expire: 1 h */ - 20, /* acquire expire: 20 s */ + 30, /* acquire expire: 30 s */ + false, /* can't replay acquire */ &rsi_upcall_cache_ops); if (IS_ERR(rsicache)) { rc = PTR_ERR(rsicache); @@ -1731,6 +1732,7 @@ int __init gss_init_svc_upcall(void) UC_RSCCACHE_HASH_SIZE, 3600, /* replaced with one from mech */ 100, /* arbitrary, not used */ + false, /* can't replay acquire */ &rsc_upcall_cache_ops); if (IS_ERR(rsccache)) { upcall_cache_cleanup(rsicache); diff --git a/lustre/utils/gss/lgss_keyring.c b/lustre/utils/gss/lgss_keyring.c index e0bb94b..11f12f6 100644 --- a/lustre/utils/gss/lgss_keyring.c +++ b/lustre/utils/gss/lgss_keyring.c @@ -393,6 +393,12 @@ static int lgssc_negotiation(struct lgss_nego_data *lnd, int req_fd[2], &ret_flags, NULL); /* time rec */ + logmsg_gss(LL_TRACE, lnd->lnd_mech, maj_stat, min_stat, + "gss_init_sec_context"); + + logmsg(LL_TRACE, "send_token:\n"); + log_hexl(LL_TRACE, send_token.value, send_token.length); + if (recv_tokenp != GSS_C_NO_BUFFER) { gss_release_buffer(&min_stat, &gr.gr_token); recv_tokenp = GSS_C_NO_BUFFER; diff --git a/lustre/utils/gss/lgss_utils.c b/lustre/utils/gss/lgss_utils.c index 35641c3..b29151d 100644 --- a/lustre/utils/gss/lgss_utils.c +++ b/lustre/utils/gss/lgss_utils.c @@ -335,6 +335,45 @@ void __logmsg_gss(loglevel_t level, const char *func, const gss_OID mech, gss_release_buffer(&min_stat2, &min_gss_buf); } +void log_hexl(int pri, unsigned char *cp, int length) +{ + logmsg(pri, "length %d\n", length); + log_hex(pri, cp, length); +} + +void log_hex(int pri, unsigned char *cp, int length) +{ + int i, j, jm; + unsigned char c; + char buffer[66]; + char *p; + + for (i = 0; i < length; i += 0x10) { + memset(buffer, ' ', sizeof(buffer)); + buffer[sizeof(buffer) - 1] = '\0'; + + p = buffer; + sprintf(p, " %04x: ", (unsigned int)i); + p += 8; + jm = length - i; + jm = jm > 16 ? 16 : jm; + + for (j = 0; j < jm; j++) + p += sprintf(p, "%02x%s", (unsigned int)cp[i + j], + j % 2 == 1 ? " " : ""); + *p = ' '; + for (; j < 16; j++) + p += 2 + (j % 2); + p++; + + for (j = 0; j < jm; j++) { + c = cp[i + j]; + sprintf(p++, "%c", isprint(c) ? c : '.'); + } + logmsg(pri, "%s", buffer); + } +} + /**************************************** * client credentials * ****************************************/ diff --git a/lustre/utils/gss/lgss_utils.h b/lustre/utils/gss/lgss_utils.h index bfd7858..5e4f23b 100644 --- a/lustre/utils/gss/lgss_utils.h +++ b/lustre/utils/gss/lgss_utils.h @@ -89,6 +89,9 @@ void __logmsg_gss(loglevel_t level, const char *func, const gss_OID mech, uint32_t major, uint32_t minor, const char *format, ...) __attribute__((format(printf, 6, 7))); +void log_hexl(int pri, unsigned char *cp, int length); +void log_hex(int pri, unsigned char *cp, int length); + #define logmsg(loglevel, format, args...) \ do { \ if (loglevel <= g_log_level) \ diff --git a/lustre/utils/gss/svcgssd_proc.c b/lustre/utils/gss/svcgssd_proc.c index 0ae8301..b3c9889 100644 --- a/lustre/utils/gss/svcgssd_proc.c +++ b/lustre/utils/gss/svcgssd_proc.c @@ -163,7 +163,7 @@ static int do_svc_downcall(gss_buffer_desc *out_handle, struct svc_cred *cred, printerr(LL_DEBUG, "writing downcall data, size %d\n", size); if (write(fd, rsc_dd, size) == -1) { rc = -errno; - printerr(LL_ERR, "ERROR: %s: failed to write message: %s\n", + printerr(LL_ERR, "ERROR: %s failed: %s\n", __func__, strerror(-rc)); } printerr(LL_DEBUG, "downcall data written ok\n"); @@ -256,7 +256,7 @@ static int send_response(int auth_res, uint64_t hash, printerr(LL_DEBUG, "writing response, size %d\n", size); if (write(fd, rsi_dd, size) == -1) { rc = -errno; - printerr(LL_ERR, "ERROR: %s: failed to write message: %s\n", + printerr(LL_ERR, "ERROR: %s failed: %s\n", __func__, strerror(-rc)); } printerr(LL_DEBUG, "response written ok\n"); -- 1.8.3.1