struct upcall_cache_entry *);
int (*parse_downcall)(struct upcall_cache *,
struct upcall_cache_entry *, void *);
+ int (*accept_expired)(struct upcall_cache *,
+ struct upcall_cache_entry *);
};
struct upcall_cache {
return 0;
}
+static inline int accept_expired(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ if (cache->uc_ops->accept_expired)
+ return cache->uc_ops->accept_expired(cache, entry);
+
+ return 0;
+}
+
static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
{
if (!*writelock) {
}
}
+/* Return value:
+ * 0 for suitable entry
+ * 1 for unsuitable entry
+ * -1 for expired entry
+ */
static int check_unlink_entry(struct upcall_cache *cache,
struct upcall_cache_entry *entry,
bool writelock)
{
time64_t now = ktime_get_seconds();
+ int accept_exp = 0;
if (UC_CACHE_IS_VALID(entry) && now < entry->ue_expire)
return 0;
UC_CACHE_SET_EXPIRED(entry);
}
- if (writelock) {
+ accept_exp = accept_expired(cache, entry);
+ if (writelock && !accept_exp) {
list_del_init(&entry->ue_hash);
if (!atomic_read(&entry->ue_refcount))
free_entry(cache, entry);
}
- return 1;
+ return accept_exp ? -1 : 1;
}
int upcall_cache_set_upcall(struct upcall_cache *cache, const char *buffer,
__u64 key, void *args)
{
struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
+ struct upcall_cache_entry *best_exp;
gid_t fsgid = (__u32)__kgid_val(INVALID_GID);
struct group_info *ginfo = NULL;
bool failedacquiring = false;
struct list_head *head;
wait_queue_entry_t wait;
bool writelock;
- int rc = 0, found;
+ int rc = 0, rc2, found;
ENTRY;
writelock = false;
}
find_with_lock:
+ best_exp = NULL;
list_for_each_entry_safe(entry, next, head, ue_hash) {
/* check invalid & expired items */
- if (check_unlink_entry(cache, entry, writelock))
+ rc2 = check_unlink_entry(cache, entry, writelock);
+ if (rc2 == -1) {
+ /* look for most recent expired entry */
+ if (upcall_compare(cache, entry, key, args) == 0 &&
+ (!best_exp ||
+ entry->ue_expire > best_exp->ue_expire))
+ best_exp = entry;
+ }
+ if (rc2)
continue;
if (upcall_compare(cache, entry, key, args) == 0) {
found = 1;
}
if (!found) {
+ if (best_exp) {
+ if (!writelock) {
+ /* We found an expired but potentially usable
+ * entry while holding the read lock, so convert
+ * it to a write lock and find again, to check
+ * that entry was not modified/freed in between.
+ */
+ write_lock_from_read(&cache->uc_lock,
+ &writelock);
+ goto find_with_lock;
+ }
+ /* let's use that expired entry */
+ entry = best_exp;
+ get_entry(entry);
+ goto out;
+ }
if (!new) {
if (writelock)
write_unlock(&cache->uc_lock);
found = 0;
goto find_with_lock;
}
+ if (best_exp) {
+ list_del_init(&best_exp->ue_hash);
+ if (!atomic_read(&best_exp->ue_refcount))
+ free_entry(cache, best_exp);
+ }
list_move(&entry->ue_hash, head);
}
/* now we hold a write lock */
read_unlock(&cache->uc_lock);
if (ginfo)
groups_free(ginfo);
+ if (IS_ERR(entry))
+ CDEBUG(D_OTHER, "no entry found: rc = %ld\n", PTR_ERR(entry));
+ else
+ CDEBUG(D_OTHER, "found entry %p flags 0x%x\n",
+ entry, entry->ue_flags);
RETURN(entry);
}
EXPORT_SYMBOL(upcall_cache_get_entry);
RETURN(status);
}
+/* Returns 1 to tell the expired entry is acceptable */
+static inline int rsc_accept_expired(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
+{
+ struct gss_rsc *rsc;
+ time64_t now = ktime_get_seconds();
+
+ if (!entry)
+ return 0;
+
+ rsc = &entry->u.rsc;
+
+ /* entry not expired? */
+ if (now < entry->ue_expire)
+ return 0;
+
+ /* We want to accept an expired entry in the following case:
+ * the client received an ldlm callback request to release a lock,
+ * and the server used an expired reverse context to send this request.
+ * The server cannot be blamed for that, as it only has a reverse
+ * context and cannot refresh it explicitly. And the client cannot
+ * refuse to use the associated gss context, otherwise it fails to reply
+ * to the ldlm callback request and gets evicted. The client, which is
+ * responsible for the context, cannot refresh it immediately, as it
+ * would not match the reverse context used by the server. But the
+ * client context is going to be refreshed right after that, along with
+ * the subsequent ldlm cancel request.
+ * The way to make sure we are presently dealing with a client-side
+ * rpc sec context is to check that sc_target is not NULL and
+ * gsc_rvs_hdl is empty. On server side gsc_rvs_hdl (the reverse handle)
+ * is always set.
+ */
+ if (rsc->sc_target && rawobj_empty(&rsc->sc_ctx.gsc_rvs_hdl))
+ return 1;
+
+ return 0;
+}
+
struct gss_rsc *rsc_entry_get(struct upcall_cache *cache, struct gss_rsc *rsc)
{
struct upcall_cache_entry *entry;
.downcall_compare = rsc_downcall_compare,
.do_upcall = rsc_do_upcall,
.parse_downcall = rsc_parse_downcall,
+ .accept_expired = rsc_accept_expired,
};
struct upcall_cache *rsccache;
if (IS_ERR_OR_NULL(found))
return found;
if (!found->sc_ctx.gsc_mechctx) {
+ CWARN("ctx hdl %#llx does not have mech ctx: rc = %d\n",
+ gss_handle_to_u64(handle), -ENOENT);
rsc_entry_put(rsccache, found);
return ERR_PTR(-ENOENT);
}
rscp = gss_svc_searchbyctx(&gw->gw_handle);
if (IS_ERR_OR_NULL(rscp)) {
- CWARN("Invalid gss ctx hdl %#llx from %s\n",
+ CWARN("Invalid gss ctx hdl %#llx from %s: rc = %ld\n",
gss_handle_to_u64(&gw->gw_handle),
- libcfs_nidstr(&req->rq_peer.nid));
+ libcfs_nidstr(&req->rq_peer.nid),
+ rscp ? PTR_ERR(rscp) : -1);
return NULL;
}
}
run_test 200 "check expired reverse gss contexts"
+cleanup_201() {
+ # unmount to get rid of old context
+ umount_client $MOUNT
+ kdestroy
+ if is_mounted $MOUNT2; then
+ umount_client $MOUNT2
+ fi
+
+ # restore original krb5.conf
+ cp -f /etc/krb5.conf.bkp /etc/krb5.conf
+ rm -f /etc/krb5.conf.bkp
+
+ # remount client
+ mount_client $MOUNT ${MOUNT_OPTS} || error "mount $MOUNT failed"
+ if is_mounted $MOUNT2; then
+ mount_client $MOUNT2 ${MOUNT_OPTS} ||
+ error "mount $MOUNT2 failed"
+ fi
+}
+
+test_201() {
+ local nid=$(lctl list_nids | grep ${NETTYPE} | head -n1)
+ local nidstr="peer_nid: ${nid},"
+ local count
+
+ lfs df -h
+ $LFS mkdir -i 0 -c 1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ stack_trap cleanup_201 EXIT
+
+ # unmount to get rid of old context
+ umount_client $MOUNT || error "umount $MOUNT failed"
+ kdestroy
+ if is_mounted $MOUNT2; then
+ umount_client $MOUNT2 || error "umount $MOUNT2 failed"
+ fi
+
+ # update ticket lifetime to be 90s
+ sed -i.bkp s+[^#]ticket_lifetime.*+ticket_lifetime\ =\ 90s+ \
+ /etc/krb5.conf
+ # establish new contexts
+ mount_client $MOUNT ${MOUNT_OPTS} || error "remount failed"
+ mount_client $MOUNT2 ${MOUNT_OPTS} || error "remount 2 failed"
+ lfs df -h
+
+ # have ldlm lock on first mount
+ touch $DIR/${tfile}_1
+ stack_trap "rm -f $DIR/${tfile}*" EXIT
+ # and make second mount take it
+ touch $DIR2/$tdir/file001
+
+ # wait lifetime + 30s to have expired contexts
+ echo Wait for gss contexts to expire... 120s
+ sleep 120
+
+ do_facet $SINGLEMDS $LCTL get_param -n \
+ mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr"
+ count=$(do_facet $SINGLEMDS $LCTL get_param -n \
+ mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" |
+ grep -vc 'delta: -')
+ echo "found $count valid reverse contexts"
+ (( count == 0 )) || error "all contexts should have expired"
+
+ # make first mount reclaim ldlm lock
+ touch $DIR/${tfile}_2
+ $LFS df $MOUNT2
+ # this should not evict the second mount
+ client_evicted $HOSTNAME && error "client got evicted"
+
+ exit 0
+}
+run_test 201 "allow expired ctx for ldlm callback"
+
complete_test $SECONDS
set_flavor_all null
cleanup_gss