LU-18933 gss: allow expired ctx from server for LDLM callback

author Sebastien Buisson <sbuisson@ddn.com>

Thu, 17 Apr 2025 14:45:55 +0000 (16:45 +0200)

committer Oleg Drokin <green@whamcloud.com>

Tue, 27 May 2025 04:05:30 +0000 (04:05 +0000)
author Sebastien Buisson <sbuisson@ddn.com>
Thu, 17 Apr 2025 14:45:55 +0000 (16:45 +0200)
committer Oleg Drokin <green@whamcloud.com>
Tue, 27 May 2025 04:05:30 +0000 (04:05 +0000)
diff --git a/lustre/include/upcall_cache.h b/lustre/include/upcall_cache.h

index caf418b..1637a76 100644 (file)
--- a/lustre/include/upcall_cache.h
+++ b/lustre/include/upcall_cache.h
@@ -124,6 +124,8 @@ struct upcall_cache_ops {
                                      struct upcall_cache_entry *);
         int             (*parse_downcall)(struct upcall_cache *,
                                           struct upcall_cache_entry *, void *);
+       int             (*accept_expired)(struct upcall_cache *,
+                                         struct upcall_cache_entry *);
  };
  
  struct upcall_cache {
diff --git a/lustre/obdclass/upcall_cache.c b/lustre/obdclass/upcall_cache.c

index d7cf1fb..7d493d2 100644 (file)
--- a/lustre/obdclass/upcall_cache.c
+++ b/lustre/obdclass/upcall_cache.c
@@ -67,6 +67,15 @@ static inline int downcall_compare(struct upcall_cache *cache,
         return 0;
  }
  
+static inline int accept_expired(struct upcall_cache *cache,
+                                struct upcall_cache_entry *entry)
+{
+       if (cache->uc_ops->accept_expired)
+               return cache->uc_ops->accept_expired(cache, entry);
+
+       return 0;
+}
+
  static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
  {
         if (!*writelock) {
@@ -76,11 +85,17 @@ static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
         }
  }
  
+/* Return value:
+ * 0 for suitable entry
+ * 1 for unsuitable entry
+ * -1 for expired entry
+ */
  static int check_unlink_entry(struct upcall_cache *cache,
                               struct upcall_cache_entry *entry,
                               bool writelock)
  {
         time64_t now = ktime_get_seconds();
+       int accept_exp = 0;
  
         if (UC_CACHE_IS_VALID(entry) && now < entry->ue_expire)
                 return 0;
@@ -98,12 +113,13 @@ static int check_unlink_entry(struct upcall_cache *cache,
                 UC_CACHE_SET_EXPIRED(entry);
         }
  
-       if (writelock) {
+       accept_exp = accept_expired(cache, entry);
+       if (writelock && !accept_exp) {
                 list_del_init(&entry->ue_hash);
                 if (!atomic_read(&entry->ue_refcount))
                         free_entry(cache, entry);
         }
-       return 1;
+       return accept_exp ? -1 : 1;
  }
  
  int upcall_cache_set_upcall(struct upcall_cache *cache, const char *buffer,
@@ -155,13 +171,14 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
                                                   __u64 key, void *args)
  {
         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
+       struct upcall_cache_entry *best_exp;
         gid_t fsgid = (__u32)__kgid_val(INVALID_GID);
         struct group_info *ginfo = NULL;
         bool failedacquiring = false;
         struct list_head *head;
         wait_queue_entry_t wait;
         bool writelock;
-       int rc = 0, found;
+       int rc = 0, rc2, found;
  
         ENTRY;
  
@@ -179,9 +196,18 @@ find_again:
                 writelock = false;
         }
  find_with_lock:
+       best_exp = NULL;
         list_for_each_entry_safe(entry, next, head, ue_hash) {
                 /* check invalid & expired items */
-               if (check_unlink_entry(cache, entry, writelock))
+               rc2 = check_unlink_entry(cache, entry, writelock);
+               if (rc2 == -1) {
+                       /* look for most recent expired entry */
+                       if (upcall_compare(cache, entry, key, args) == 0 &&
+                           (!best_exp ||
+                            entry->ue_expire > best_exp->ue_expire))
+                               best_exp = entry;
+               }
+               if (rc2)
                         continue;
                 if (upcall_compare(cache, entry, key, args) == 0) {
                         found = 1;
@@ -190,6 +216,22 @@ find_with_lock:
         }
  
         if (!found) {
+               if (best_exp) {
+                       if (!writelock) {
+                               /* We found an expired but potentially usable
+                                * entry while holding the read lock, so convert
+                                * it to a write lock and find again, to check
+                                * that entry was not modified/freed in between.
+                                */
+                               write_lock_from_read(&cache->uc_lock,
+                                                    &writelock);
+                               goto find_with_lock;
+                       }
+                       /* let's use that expired entry */
+                       entry = best_exp;
+                       get_entry(entry);
+                       goto out;
+               }
                 if (!new) {
                         if (writelock)
                                 write_unlock(&cache->uc_lock);
@@ -219,6 +261,11 @@ find_with_lock:
                         found = 0;
                         goto find_with_lock;
                 }
+               if (best_exp) {
+                       list_del_init(&best_exp->ue_hash);
+                       if (!atomic_read(&best_exp->ue_refcount))
+                               free_entry(cache, best_exp);
+               }
                 list_move(&entry->ue_hash, head);
         }
         /* now we hold a write lock */
@@ -333,6 +380,11 @@ out:
                 read_unlock(&cache->uc_lock);
         if (ginfo)
                 groups_free(ginfo);
+       if (IS_ERR(entry))
+               CDEBUG(D_OTHER, "no entry found: rc = %ld\n", PTR_ERR(entry));
+       else
+               CDEBUG(D_OTHER, "found entry %p flags 0x%x\n",
+                      entry, entry->ue_flags);
         RETURN(entry);
  }
  EXPORT_SYMBOL(upcall_cache_get_entry);
diff --git a/lustre/ptlrpc/gss/gss_svc_upcall.c b/lustre/ptlrpc/gss/gss_svc_upcall.c

index d21ed4e..83449d3 100644 (file)
--- a/lustre/ptlrpc/gss/gss_svc_upcall.c
+++ b/lustre/ptlrpc/gss/gss_svc_upcall.c
@@ -607,6 +607,44 @@ out:
         RETURN(status);
  }
  
+/* Returns 1 to tell the expired entry is acceptable */
+static inline int rsc_accept_expired(struct upcall_cache *cache,
+                                    struct upcall_cache_entry *entry)
+{
+       struct gss_rsc *rsc;
+       time64_t now = ktime_get_seconds();
+
+       if (!entry)
+               return 0;
+
+       rsc = &entry->u.rsc;
+
+       /* entry not expired? */
+       if (now < entry->ue_expire)
+               return 0;
+
+       /* We want to accept an expired entry in the following case:
+        * the client received an ldlm callback request to release a lock,
+        * and the server used an expired reverse context to send this request.
+        * The server cannot be blamed for that, as it only has a reverse
+        * context and cannot refresh it explicitly. And the client cannot
+        * refuse to use the associated gss context, otherwise it fails to reply
+        * to the ldlm callback request and gets evicted. The client, which is
+        * responsible for the context, cannot refresh it immediately, as it
+        * would not match the reverse context used by the server. But the
+        * client context is going to be refreshed right after that, along with
+        * the subsequent ldlm cancel request.
+        * The way to make sure we are presently dealing with a client-side
+        * rpc sec context is to check that sc_target is not NULL and
+        * gsc_rvs_hdl is empty. On server side gsc_rvs_hdl (the reverse handle)
+        * is always set.
+        */
+       if (rsc->sc_target && rawobj_empty(&rsc->sc_ctx.gsc_rvs_hdl))
+               return 1;
+
+       return 0;
+}
+
  struct gss_rsc *rsc_entry_get(struct upcall_cache *cache, struct gss_rsc *rsc)
  {
         struct upcall_cache_entry *entry;
@@ -639,6 +677,7 @@ struct upcall_cache_ops rsc_upcall_cache_ops = {
         .downcall_compare = rsc_downcall_compare,
         .do_upcall        = rsc_do_upcall,
         .parse_downcall   = rsc_parse_downcall,
+       .accept_expired   = rsc_accept_expired,
  };
  
  struct upcall_cache *rsccache;
@@ -661,6 +700,8 @@ static struct gss_rsc *gss_svc_searchbyctx(rawobj_t *handle)
         if (IS_ERR_OR_NULL(found))
                 return found;
         if (!found->sc_ctx.gsc_mechctx) {
+               CWARN("ctx hdl %#llx does not have mech ctx: rc = %d\n",
+                     gss_handle_to_u64(handle), -ENOENT);
                 rsc_entry_put(rsccache, found);
                 return ERR_PTR(-ENOENT);
         }
@@ -966,9 +1007,10 @@ struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req,
  
         rscp = gss_svc_searchbyctx(&gw->gw_handle);
         if (IS_ERR_OR_NULL(rscp)) {
-               CWARN("Invalid gss ctx hdl %#llx from %s\n",
+               CWARN("Invalid gss ctx hdl %#llx from %s: rc = %ld\n",
                       gss_handle_to_u64(&gw->gw_handle),
-                     libcfs_nidstr(&req->rq_peer.nid));
+                     libcfs_nidstr(&req->rq_peer.nid),
+                     rscp ? PTR_ERR(rscp) : -1);
                 return NULL;
         }
  
diff --git a/lustre/tests/sanity-krb5.sh b/lustre/tests/sanity-krb5.sh

index 8489c24..d390d91 100755 (executable)
--- a/lustre/tests/sanity-krb5.sh
+++ b/lustre/tests/sanity-krb5.sh
@@ -1039,6 +1039,78 @@ test_200() {
  }
  run_test 200 "check expired reverse gss contexts"
  
+cleanup_201() {
+       # unmount to get rid of old context
+       umount_client $MOUNT
+       kdestroy
+       if is_mounted $MOUNT2; then
+               umount_client $MOUNT2
+       fi
+
+       # restore original krb5.conf
+       cp -f /etc/krb5.conf.bkp /etc/krb5.conf
+       rm -f /etc/krb5.conf.bkp
+
+       # remount client
+       mount_client $MOUNT ${MOUNT_OPTS} || error "mount $MOUNT failed"
+       if is_mounted $MOUNT2; then
+               mount_client $MOUNT2 ${MOUNT_OPTS} ||
+                       error "mount $MOUNT2 failed"
+       fi
+}
+
+test_201() {
+       local nid=$(lctl list_nids | grep ${NETTYPE} | head -n1)
+       local nidstr="peer_nid: ${nid},"
+       local count
+
+       lfs df -h
+       $LFS mkdir -i 0 -c 1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+       stack_trap cleanup_201 EXIT
+
+       # unmount to get rid of old context
+       umount_client $MOUNT || error "umount $MOUNT failed"
+       kdestroy
+       if is_mounted $MOUNT2; then
+               umount_client $MOUNT2 || error "umount $MOUNT2 failed"
+       fi
+
+       # update ticket lifetime to be 90s
+       sed -i.bkp s+[^#]ticket_lifetime.*+ticket_lifetime\ =\ 90s+ \
+               /etc/krb5.conf
+       # establish new contexts
+       mount_client $MOUNT ${MOUNT_OPTS} || error "remount failed"
+       mount_client $MOUNT2 ${MOUNT_OPTS} || error "remount 2 failed"
+       lfs df -h
+
+       # have ldlm lock on first mount
+       touch $DIR/${tfile}_1
+       stack_trap "rm -f $DIR/${tfile}*" EXIT
+       # and make second mount take it
+       touch $DIR2/$tdir/file001
+
+       # wait lifetime + 30s to have expired contexts
+       echo Wait for gss contexts to expire... 120s
+       sleep 120
+
+       do_facet $SINGLEMDS $LCTL get_param -n \
+               mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr"
+       count=$(do_facet $SINGLEMDS $LCTL get_param -n \
+               mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" |
+               grep -vc 'delta: -')
+       echo "found $count valid reverse contexts"
+       (( count == 0 )) || error "all contexts should have expired"
+
+       # make first mount reclaim ldlm lock
+       touch $DIR/${tfile}_2
+       $LFS df $MOUNT2
+       # this should not evict the second mount
+       client_evicted $HOSTNAME && error "client got evicted"
+
+       exit 0
+}
+run_test 201 "allow expired ctx for ldlm callback"
+
  complete_test $SECONDS
  set_flavor_all null
  cleanup_gss
author	Sebastien Buisson <sbuisson@ddn.com>
	Thu, 17 Apr 2025 14:45:55 +0000 (16:45 +0200)
committer	Oleg Drokin <green@whamcloud.com>
	Tue, 27 May 2025 04:05:30 +0000 (04:05 +0000)
lustre/include/upcall_cache.h		patch \| blob \| history
lustre/obdclass/upcall_cache.c		patch \| blob \| history
lustre/ptlrpc/gss/gss_svc_upcall.c		patch \| blob \| history
lustre/tests/sanity-krb5.sh		patch \| blob \| history