Lustre's upcall cache has a retry mechanism in case the upcall was
interrupted or failed and we timed out waiting. In this case we do our
best to retry and do the upcall again.
But when the upcall cache is used for GSS contexts, the upcall cannot
be done twice with same data. The GSSAPI implements security measures
that forbids that kind of request replay, to prevent man-in-the-middle
attacks for instance.
Add a new uc_acquire_replay field to struct upcall_cache, so that
upcall cache users can tell if acquire upcall can be replayed.
For identity upcall, this replay is fine. But for GSS contexts we need
to avoid those replays.
And bump upcall cache timeout value from 20s to 30s for GSS context
init requests.
Also add more debug messages to gss code for both client and server
sides, and both kernel and userspace.
Lustre-change: https://review.whamcloud.com/52689
Lustre-commit:
d0194a4b5f6efa26d5473c2793b525f5fdb77e67
Test-Parameters: kerberos=true testlist=sanity-krb5
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I56decc83a4f0d21be420e87cb0417826011932af
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Aurelien Degremont <adegremont@nvidia.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53255
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
char uc_name[40]; /* for upcall */
char uc_upcall[UC_CACHE_UPCALL_MAXPATH];
+ bool uc_acquire_replay;
time64_t uc_acquire_expire; /* seconds */
time64_t uc_entry_expire; /* seconds */
struct upcall_cache_ops *uc_ops;
void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args);
struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
int hashsz, time64_t entry_expire,
- time64_t acquire_expire,
+ time64_t acquire_expire, bool replayable,
struct upcall_cache_ops *ops);
void upcall_cache_cleanup(struct upcall_cache *cache);
UC_IDCACHE_HASH_SIZE,
1200, /* entry expire: 20 mn */
30, /* acquire expire: 30 s */
+ true, /* acquire can replay */
&mdt_identity_upcall_cache_ops);
if (IS_ERR(m->mdt_identity_cache)) {
rc = PTR_ERR(m->mdt_identity_cache);
entry->ue_key = key;
atomic_set(&entry->ue_refcount, 0);
init_waitqueue_head(&entry->ue_waitq);
+ entry->ue_acquire_expire = 0;
+ entry->ue_expire = 0;
if (cache->uc_ops->init_entry)
cache->uc_ops->init_entry(entry, args);
return entry;
if (UC_CACHE_IS_ACQUIRING(entry)) {
/* we're interrupted or upcall failed in the middle */
rc = left > 0 ? -EINTR : -ETIMEDOUT;
+ /* if we waited uc_acquire_expire, we can try again
+ * with same data, but only if acquire is replayable
+ */
+ if (left <= 0 && !cache->uc_acquire_replay)
+ failedacquiring = true;
put_entry(cache, entry);
if (!failedacquiring) {
spin_unlock(&cache->uc_lock);
if (err) {
CDEBUG(D_OTHER, "%s: upcall for key %llu returned %d\n",
cache->uc_name, entry->ue_key, err);
- GOTO(out, rc = -EINVAL);
+ GOTO(out, rc = err);
}
if (!UC_CACHE_IS_ACQUIRING(entry)) {
struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
int hashsz, time64_t entry_expire,
- time64_t acquire_expire,
+ time64_t acquire_expire, bool replayable,
struct upcall_cache_ops *ops)
{
struct upcall_cache *cache;
strlcpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall));
cache->uc_entry_expire = entry_expire;
cache->uc_acquire_expire = acquire_expire;
+ cache->uc_acquire_replay = replayable;
cache->uc_ops = ops;
RETURN(cache);
param.status = rc;
if (rc != -EACCES)
param.status = -ETIMEDOUT;
+ CDEBUG(D_SEC,
+ "%s: ctx init req got %d, returning to userspace status %llu\n",
+ obd->obd_name, rc, param.status);
goto out_copy;
}
rsicache = upcall_cache_init(RSI_CACHE_NAME, RSI_UPCALL_PATH,
UC_RSICACHE_HASH_SIZE,
3600, /* entry expire: 1 h */
- 20, /* acquire expire: 20 s */
+ 30, /* acquire expire: 30 s */
+ false, /* can't replay acquire */
&rsi_upcall_cache_ops);
if (IS_ERR(rsicache)) {
rc = PTR_ERR(rsicache);
UC_RSCCACHE_HASH_SIZE,
3600, /* replaced with one from mech */
100, /* arbitrary, not used */
+ false, /* can't replay acquire */
&rsc_upcall_cache_ops);
if (IS_ERR(rsccache)) {
upcall_cache_cleanup(rsicache);
&ret_flags,
NULL); /* time rec */
+ logmsg_gss(LL_TRACE, lnd->lnd_mech, maj_stat, min_stat,
+ "gss_init_sec_context");
+
+ logmsg(LL_TRACE, "send_token:\n");
+ log_hexl(LL_TRACE, send_token.value, send_token.length);
+
if (recv_tokenp != GSS_C_NO_BUFFER) {
gss_release_buffer(&min_stat, &gr.gr_token);
recv_tokenp = GSS_C_NO_BUFFER;
gss_release_buffer(&min_stat2, &min_gss_buf);
}
+void log_hexl(int pri, unsigned char *cp, int length)
+{
+ logmsg(pri, "length %d\n", length);
+ log_hex(pri, cp, length);
+}
+
+void log_hex(int pri, unsigned char *cp, int length)
+{
+ int i, j, jm;
+ unsigned char c;
+ char buffer[66];
+ char *p;
+
+ for (i = 0; i < length; i += 0x10) {
+ memset(buffer, ' ', sizeof(buffer));
+ buffer[sizeof(buffer) - 1] = '\0';
+
+ p = buffer;
+ sprintf(p, " %04x: ", (unsigned int)i);
+ p += 8;
+ jm = length - i;
+ jm = jm > 16 ? 16 : jm;
+
+ for (j = 0; j < jm; j++)
+ p += sprintf(p, "%02x%s", (unsigned int)cp[i + j],
+ j % 2 == 1 ? " " : "");
+ *p = ' ';
+ for (; j < 16; j++)
+ p += 2 + (j % 2);
+ p++;
+
+ for (j = 0; j < jm; j++) {
+ c = cp[i + j];
+ sprintf(p++, "%c", isprint(c) ? c : '.');
+ }
+ logmsg(pri, "%s", buffer);
+ }
+}
+
/****************************************
* client credentials *
****************************************/
uint32_t major, uint32_t minor, const char *format, ...)
__attribute__((format(printf, 6, 7)));
+void log_hexl(int pri, unsigned char *cp, int length);
+void log_hex(int pri, unsigned char *cp, int length);
+
#define logmsg(loglevel, format, args...) \
do { \
if (loglevel <= g_log_level) \
printerr(LL_DEBUG, "writing downcall data, size %d\n", size);
if (write(fd, rsc_dd, size) == -1) {
rc = -errno;
- printerr(LL_ERR, "ERROR: %s: failed to write message: %s\n",
+ printerr(LL_ERR, "ERROR: %s failed: %s\n",
__func__, strerror(-rc));
}
printerr(LL_DEBUG, "downcall data written ok\n");
printerr(LL_DEBUG, "writing response, size %d\n", size);
if (write(fd, rsi_dd, size) == -1) {
rc = -errno;
- printerr(LL_ERR, "ERROR: %s: failed to write message: %s\n",
+ printerr(LL_ERR, "ERROR: %s failed: %s\n",
__func__, strerror(-rc));
}
printerr(LL_DEBUG, "response written ok\n");