Whamcloud - gitweb
LU-18933 gss: allow expired ctx from server for LDLM callback 40/58840/5
authorSebastien Buisson <sbuisson@ddn.com>
Thu, 17 Apr 2025 14:45:55 +0000 (16:45 +0200)
committerOleg Drokin <green@whamcloud.com>
Tue, 27 May 2025 04:05:30 +0000 (04:05 +0000)
When a client receives an ldlm callback request to release a lock,
the server might use an expired reverse context to send this request.
The server might not have any other choice, as it only has a reverse
context and cannot refresh it explicitly. And if the client refuses to
use the associated gss context, it fails to reply to the ldlm callback
request and gets evicted.
In order to avoid this eviction, the client needs to accept the
expired gss context associated with the server request. The client
cannot refresh its own context immediately, as it would not match the
reverse context used by the server. But the client context is going to
be refreshed right after that, along with the subsequent ldlm cancel
request.

sanity-krb5 test_201 is added to exercise this use case.

Test-Parameters: kerberos=true testlist=sanity-krb5
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I9bc82731cb7013e07cc09295cb488f52a0034ea9
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58840
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Aurelien Degremont <adegremont@nvidia.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/upcall_cache.h
lustre/obdclass/upcall_cache.c
lustre/ptlrpc/gss/gss_svc_upcall.c
lustre/tests/sanity-krb5.sh

index caf418b..1637a76 100644 (file)
@@ -124,6 +124,8 @@ struct upcall_cache_ops {
                                     struct upcall_cache_entry *);
        int             (*parse_downcall)(struct upcall_cache *,
                                          struct upcall_cache_entry *, void *);
+       int             (*accept_expired)(struct upcall_cache *,
+                                         struct upcall_cache_entry *);
 };
 
 struct upcall_cache {
index d7cf1fb..7d493d2 100644 (file)
@@ -67,6 +67,15 @@ static inline int downcall_compare(struct upcall_cache *cache,
        return 0;
 }
 
+static inline int accept_expired(struct upcall_cache *cache,
+                                struct upcall_cache_entry *entry)
+{
+       if (cache->uc_ops->accept_expired)
+               return cache->uc_ops->accept_expired(cache, entry);
+
+       return 0;
+}
+
 static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
 {
        if (!*writelock) {
@@ -76,11 +85,17 @@ static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
        }
 }
 
+/* Return value:
+ * 0 for suitable entry
+ * 1 for unsuitable entry
+ * -1 for expired entry
+ */
 static int check_unlink_entry(struct upcall_cache *cache,
                              struct upcall_cache_entry *entry,
                              bool writelock)
 {
        time64_t now = ktime_get_seconds();
+       int accept_exp = 0;
 
        if (UC_CACHE_IS_VALID(entry) && now < entry->ue_expire)
                return 0;
@@ -98,12 +113,13 @@ static int check_unlink_entry(struct upcall_cache *cache,
                UC_CACHE_SET_EXPIRED(entry);
        }
 
-       if (writelock) {
+       accept_exp = accept_expired(cache, entry);
+       if (writelock && !accept_exp) {
                list_del_init(&entry->ue_hash);
                if (!atomic_read(&entry->ue_refcount))
                        free_entry(cache, entry);
        }
-       return 1;
+       return accept_exp ? -1 : 1;
 }
 
 int upcall_cache_set_upcall(struct upcall_cache *cache, const char *buffer,
@@ -155,13 +171,14 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
                                                  __u64 key, void *args)
 {
        struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
+       struct upcall_cache_entry *best_exp;
        gid_t fsgid = (__u32)__kgid_val(INVALID_GID);
        struct group_info *ginfo = NULL;
        bool failedacquiring = false;
        struct list_head *head;
        wait_queue_entry_t wait;
        bool writelock;
-       int rc = 0, found;
+       int rc = 0, rc2, found;
 
        ENTRY;
 
@@ -179,9 +196,18 @@ find_again:
                writelock = false;
        }
 find_with_lock:
+       best_exp = NULL;
        list_for_each_entry_safe(entry, next, head, ue_hash) {
                /* check invalid & expired items */
-               if (check_unlink_entry(cache, entry, writelock))
+               rc2 = check_unlink_entry(cache, entry, writelock);
+               if (rc2 == -1) {
+                       /* look for most recent expired entry */
+                       if (upcall_compare(cache, entry, key, args) == 0 &&
+                           (!best_exp ||
+                            entry->ue_expire > best_exp->ue_expire))
+                               best_exp = entry;
+               }
+               if (rc2)
                        continue;
                if (upcall_compare(cache, entry, key, args) == 0) {
                        found = 1;
@@ -190,6 +216,22 @@ find_with_lock:
        }
 
        if (!found) {
+               if (best_exp) {
+                       if (!writelock) {
+                               /* We found an expired but potentially usable
+                                * entry while holding the read lock, so convert
+                                * it to a write lock and find again, to check
+                                * that entry was not modified/freed in between.
+                                */
+                               write_lock_from_read(&cache->uc_lock,
+                                                    &writelock);
+                               goto find_with_lock;
+                       }
+                       /* let's use that expired entry */
+                       entry = best_exp;
+                       get_entry(entry);
+                       goto out;
+               }
                if (!new) {
                        if (writelock)
                                write_unlock(&cache->uc_lock);
@@ -219,6 +261,11 @@ find_with_lock:
                        found = 0;
                        goto find_with_lock;
                }
+               if (best_exp) {
+                       list_del_init(&best_exp->ue_hash);
+                       if (!atomic_read(&best_exp->ue_refcount))
+                               free_entry(cache, best_exp);
+               }
                list_move(&entry->ue_hash, head);
        }
        /* now we hold a write lock */
@@ -333,6 +380,11 @@ out:
                read_unlock(&cache->uc_lock);
        if (ginfo)
                groups_free(ginfo);
+       if (IS_ERR(entry))
+               CDEBUG(D_OTHER, "no entry found: rc = %ld\n", PTR_ERR(entry));
+       else
+               CDEBUG(D_OTHER, "found entry %p flags 0x%x\n",
+                      entry, entry->ue_flags);
        RETURN(entry);
 }
 EXPORT_SYMBOL(upcall_cache_get_entry);
index d21ed4e..83449d3 100644 (file)
@@ -607,6 +607,44 @@ out:
        RETURN(status);
 }
 
+/* Returns 1 to tell the expired entry is acceptable */
+static inline int rsc_accept_expired(struct upcall_cache *cache,
+                                    struct upcall_cache_entry *entry)
+{
+       struct gss_rsc *rsc;
+       time64_t now = ktime_get_seconds();
+
+       if (!entry)
+               return 0;
+
+       rsc = &entry->u.rsc;
+
+       /* entry not expired? */
+       if (now < entry->ue_expire)
+               return 0;
+
+       /* We want to accept an expired entry in the following case:
+        * the client received an ldlm callback request to release a lock,
+        * and the server used an expired reverse context to send this request.
+        * The server cannot be blamed for that, as it only has a reverse
+        * context and cannot refresh it explicitly. And the client cannot
+        * refuse to use the associated gss context, otherwise it fails to reply
+        * to the ldlm callback request and gets evicted. The client, which is
+        * responsible for the context, cannot refresh it immediately, as it
+        * would not match the reverse context used by the server. But the
+        * client context is going to be refreshed right after that, along with
+        * the subsequent ldlm cancel request.
+        * The way to make sure we are presently dealing with a client-side
+        * rpc sec context is to check that sc_target is not NULL and
+        * gsc_rvs_hdl is empty. On server side gsc_rvs_hdl (the reverse handle)
+        * is always set.
+        */
+       if (rsc->sc_target && rawobj_empty(&rsc->sc_ctx.gsc_rvs_hdl))
+               return 1;
+
+       return 0;
+}
+
 struct gss_rsc *rsc_entry_get(struct upcall_cache *cache, struct gss_rsc *rsc)
 {
        struct upcall_cache_entry *entry;
@@ -639,6 +677,7 @@ struct upcall_cache_ops rsc_upcall_cache_ops = {
        .downcall_compare = rsc_downcall_compare,
        .do_upcall        = rsc_do_upcall,
        .parse_downcall   = rsc_parse_downcall,
+       .accept_expired   = rsc_accept_expired,
 };
 
 struct upcall_cache *rsccache;
@@ -661,6 +700,8 @@ static struct gss_rsc *gss_svc_searchbyctx(rawobj_t *handle)
        if (IS_ERR_OR_NULL(found))
                return found;
        if (!found->sc_ctx.gsc_mechctx) {
+               CWARN("ctx hdl %#llx does not have mech ctx: rc = %d\n",
+                     gss_handle_to_u64(handle), -ENOENT);
                rsc_entry_put(rsccache, found);
                return ERR_PTR(-ENOENT);
        }
@@ -966,9 +1007,10 @@ struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req,
 
        rscp = gss_svc_searchbyctx(&gw->gw_handle);
        if (IS_ERR_OR_NULL(rscp)) {
-               CWARN("Invalid gss ctx hdl %#llx from %s\n",
+               CWARN("Invalid gss ctx hdl %#llx from %s: rc = %ld\n",
                      gss_handle_to_u64(&gw->gw_handle),
-                     libcfs_nidstr(&req->rq_peer.nid));
+                     libcfs_nidstr(&req->rq_peer.nid),
+                     rscp ? PTR_ERR(rscp) : -1);
                return NULL;
        }
 
index 8489c24..d390d91 100755 (executable)
@@ -1039,6 +1039,78 @@ test_200() {
 }
 run_test 200 "check expired reverse gss contexts"
 
+cleanup_201() {
+       # unmount to get rid of old context
+       umount_client $MOUNT
+       kdestroy
+       if is_mounted $MOUNT2; then
+               umount_client $MOUNT2
+       fi
+
+       # restore original krb5.conf
+       cp -f /etc/krb5.conf.bkp /etc/krb5.conf
+       rm -f /etc/krb5.conf.bkp
+
+       # remount client
+       mount_client $MOUNT ${MOUNT_OPTS} || error "mount $MOUNT failed"
+       if is_mounted $MOUNT2; then
+               mount_client $MOUNT2 ${MOUNT_OPTS} ||
+                       error "mount $MOUNT2 failed"
+       fi
+}
+
+test_201() {
+       local nid=$(lctl list_nids | grep ${NETTYPE} | head -n1)
+       local nidstr="peer_nid: ${nid},"
+       local count
+
+       lfs df -h
+       $LFS mkdir -i 0 -c 1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+       stack_trap cleanup_201 EXIT
+
+       # unmount to get rid of old context
+       umount_client $MOUNT || error "umount $MOUNT failed"
+       kdestroy
+       if is_mounted $MOUNT2; then
+               umount_client $MOUNT2 || error "umount $MOUNT2 failed"
+       fi
+
+       # update ticket lifetime to be 90s
+       sed -i.bkp s+[^#]ticket_lifetime.*+ticket_lifetime\ =\ 90s+ \
+               /etc/krb5.conf
+       # establish new contexts
+       mount_client $MOUNT ${MOUNT_OPTS} || error "remount failed"
+       mount_client $MOUNT2 ${MOUNT_OPTS} || error "remount 2 failed"
+       lfs df -h
+
+       # have ldlm lock on first mount
+       touch $DIR/${tfile}_1
+       stack_trap "rm -f $DIR/${tfile}*" EXIT
+       # and make second mount take it
+       touch $DIR2/$tdir/file001
+
+       # wait lifetime + 30s to have expired contexts
+       echo Wait for gss contexts to expire... 120s
+       sleep 120
+
+       do_facet $SINGLEMDS $LCTL get_param -n \
+               mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr"
+       count=$(do_facet $SINGLEMDS $LCTL get_param -n \
+               mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" |
+               grep -vc 'delta: -')
+       echo "found $count valid reverse contexts"
+       (( count == 0 )) || error "all contexts should have expired"
+
+       # make first mount reclaim ldlm lock
+       touch $DIR/${tfile}_2
+       $LFS df $MOUNT2
+       # this should not evict the second mount
+       client_evicted $HOSTNAME && error "client got evicted"
+
+       exit 0
+}
+run_test 201 "allow expired ctx for ldlm callback"
+
 complete_test $SECONDS
 set_flavor_all null
 cleanup_gss