From 127128bed3dc7a69645087aaa50d6c342987366a Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Thu, 14 Sep 2023 18:00:04 +0200 Subject: [PATCH] LU-16498 obdclass: change uc_lock to rwlock Change the upcall cache uc_lock to a read-write lock so that threads can get the read lock to do concurrent lookups in the upcall cache, and only grab the write lock in the rare case when a new entry is added or old entries are expired. That reduces serialization between server threads during normal operation, and avoids all of the threads spinning for some time if the requested key (UID or gss context) is not in the cache at all, before they sleep. Lustre-change: https://review.whamcloud.com/52395 Lustre-commit: TBD (from 003615a0a6711334d95c42f3c41852e1cbc8e77b) Test-Parameters: kerberos=true testlist=sanity-krb5 Signed-off-by: Sebastien Buisson Change-Id: I812400104fd2115d19386fb4a03bb3ce99c49383 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53622 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/upcall_cache.h | 2 +- lustre/obdclass/upcall_cache.c | 123 ++++++++++++++++++++++---------- lustre/obdclass/upcall_cache_internal.c | 14 ++-- 3 files changed, 95 insertions(+), 44 deletions(-) diff --git a/lustre/include/upcall_cache.h b/lustre/include/upcall_cache.h index 6b4cbc2..3567f84 100644 --- a/lustre/include/upcall_cache.h +++ b/lustre/include/upcall_cache.h @@ -147,7 +147,7 @@ struct upcall_cache_ops { struct upcall_cache { struct list_head *uc_hashtable; int uc_hashsize; - spinlock_t uc_lock; + rwlock_t uc_lock; struct rw_semaphore uc_upcall_rwsem; char uc_name[40]; /* for upcall */ diff --git a/lustre/obdclass/upcall_cache.c b/lustre/obdclass/upcall_cache.c index 5e7f2dd..3058d3c 100644 --- a/lustre/obdclass/upcall_cache.c +++ b/lustre/obdclass/upcall_cache.c @@ -87,8 +87,18 @@ static inline int downcall_compare(struct upcall_cache *cache, return 0; } +static inline void write_lock_from_read(rwlock_t *lock, bool *writelock) +{ + if (!*writelock) { + read_unlock(lock); + write_lock(lock); + *writelock = true; + } +} + static int check_unlink_entry(struct upcall_cache *cache, - struct upcall_cache_entry *entry) + struct upcall_cache_entry *entry, + bool writelock) { time64_t now = ktime_get_seconds(); @@ -100,15 +110,19 @@ static int check_unlink_entry(struct upcall_cache *cache, now < entry->ue_acquire_expire) return 0; - UC_CACHE_SET_EXPIRED(entry); - wake_up_all(&entry->ue_waitq); - } else if (!UC_CACHE_IS_INVALID(entry)) { + if (writelock) { + UC_CACHE_SET_EXPIRED(entry); + wake_up(&entry->ue_waitq); + } + } else if (!UC_CACHE_IS_INVALID(entry) && writelock) { UC_CACHE_SET_EXPIRED(entry); } - list_del_init(&entry->ue_hash); - if (!atomic_read(&entry->ue_refcount)) - free_entry(cache, entry); + if (writelock) { + list_del_init(&entry->ue_hash); + if (!atomic_read(&entry->ue_refcount)) + free_entry(cache, entry); + } return 1; } @@ -133,7 +147,9 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, bool failedacquiring = false; struct list_head *head; wait_queue_entry_t wait; + bool writelock; int rc, found, ngroups = 0; + ENTRY; LASSERT(cache); @@ -142,10 +158,17 @@ struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache, cache->uc_hashsize)]; find_again: found = 0; - spin_lock(&cache->uc_lock); + if (new) { + write_lock(&cache->uc_lock); + writelock = true; + } else { + read_lock(&cache->uc_lock); + writelock = false; + } +find_with_lock: list_for_each_entry_safe(entry, next, head, ue_hash) { /* check invalid & expired items */ - if (check_unlink_entry(cache, entry)) + if (check_unlink_entry(cache, entry, writelock)) continue; if (upcall_compare(cache, entry, key, args) == 0) { found = 1; @@ -155,10 +178,14 @@ find_again: if (!found) { if (!new) { - spin_unlock(&cache->uc_lock); + if (writelock) + write_unlock(&cache->uc_lock); + else + read_unlock(&cache->uc_lock); new = alloc_entry(cache, key, args); if (!new) { - CERROR("fail to alloc entry\n"); + CERROR("%s: fail to alloc entry: rc = %d\n", + cache->uc_name, -ENOMEM); RETURN(ERR_PTR(-ENOMEM)); } goto find_again; @@ -170,17 +197,25 @@ find_again: if (new) { free_entry(cache, new); new = NULL; + } else if (!writelock) { + /* We found an entry while holding the read lock, so + * convert it to a write lock and find again, to check + * that entry was not modified/freed in between. + */ + write_lock_from_read(&cache->uc_lock, &writelock); + goto find_with_lock; } list_move(&entry->ue_hash, head); } + /* now we hold a write lock */ get_entry(entry); /* special processing of supp groups for identity upcall */ if (strcmp(cache->uc_upcall, IDENTITY_UPCALL_INTERNAL) == 0) { - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); rc = upcall_cache_get_entry_internal(cache, entry, args, &fsgid, &grouplist, &ngroups); - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); if (rc) GOTO(out, entry = ERR_PTR(rc)); } @@ -189,9 +224,9 @@ find_again: if (UC_CACHE_IS_NEW(entry)) { UC_CACHE_SET_ACQUIRING(entry); UC_CACHE_CLEAR_NEW(entry); - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); rc = refresh_entry(cache, entry, fsgid, ngroups, grouplist); - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); entry->ue_acquire_expire = ktime_get_seconds() + cache->uc_acquire_expire; if (rc < 0) { @@ -215,11 +250,11 @@ find_again: init_wait(&wait); add_wait_queue(&entry->ue_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); left = schedule_timeout(expiry); - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); remove_wait_queue(&entry->ue_waitq, &wait); if (UC_CACHE_IS_ACQUIRING(entry)) { /* we're interrupted or upcall failed in the middle */ @@ -231,7 +266,7 @@ find_again: failedacquiring = true; put_entry(cache, entry); if (!failedacquiring) { - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); failedacquiring = true; new = NULL; CDEBUG(D_OTHER, @@ -239,8 +274,9 @@ find_again: entry->ue_key, rc); goto find_again; } - CERROR("acquire for key %llu: error %d\n", - entry->ue_key, rc); + CERROR("%s: acquire for key %lld after %llu: rc = %d\n", + cache->uc_name, entry->ue_key, + cache->uc_acquire_expire, rc); GOTO(out, entry = ERR_PTR(rc)); } } @@ -255,15 +291,16 @@ find_again: * We can't refresh the existing one because some * memory might be shared by multiple processes. */ - if (check_unlink_entry(cache, entry)) { + if (check_unlink_entry(cache, entry, writelock)) { /* if expired, try again. but if this entry is * created by me but too quickly turn to expired * without any error, should at least give a * chance to use it once. */ if (entry != new) { + /* as stated above, we already hold a write lock */ put_entry(cache, entry); - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); new = NULL; goto find_again; } @@ -271,7 +308,10 @@ find_again: /* Now we know it's good */ out: - spin_unlock(&cache->uc_lock); + if (writelock) + write_unlock(&cache->uc_lock); + else + read_unlock(&cache->uc_lock); if (grouplist) CFS_FREE_PTR_ARRAY(grouplist, ngroups); RETURN(entry); @@ -288,13 +328,13 @@ void upcall_cache_update_entry(struct upcall_cache *cache, struct upcall_cache_entry *entry, time64_t expire, int state) { - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); entry->ue_expire = expire; if (!state) UC_CACHE_SET_VALID(entry); else entry->ue_flags |= state; - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); } EXPORT_SYMBOL(upcall_cache_update_entry); @@ -309,9 +349,9 @@ void upcall_cache_put_entry(struct upcall_cache *cache, } LASSERT(atomic_read(&entry->ue_refcount) > 0); - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); put_entry(cache, entry); - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); EXIT; } EXPORT_SYMBOL(upcall_cache_put_entry); @@ -322,6 +362,7 @@ int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, struct upcall_cache_entry *entry = NULL; struct list_head *head; int found = 0, rc = 0; + bool writelock = false; ENTRY; LASSERT(cache); @@ -329,7 +370,7 @@ int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key, cache->uc_hashsize)]; - spin_lock(&cache->uc_lock); + read_lock(&cache->uc_lock); list_for_each_entry(entry, head, ue_hash) { if (downcall_compare(cache, entry, key, args) == 0) { found = 1; @@ -342,32 +383,35 @@ int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, CDEBUG(D_OTHER, "%s: upcall for key %llu not expected\n", cache->uc_name, key); /* haven't found, it's possible */ - spin_unlock(&cache->uc_lock); + read_unlock(&cache->uc_lock); RETURN(-EINVAL); } if (err) { CDEBUG(D_OTHER, "%s: upcall for key %llu returned %d\n", cache->uc_name, entry->ue_key, err); + write_lock_from_read(&cache->uc_lock, &writelock); GOTO(out, rc = err); } if (!UC_CACHE_IS_ACQUIRING(entry)) { CDEBUG(D_RPCTRACE, "%s: found uptodate entry %p (key %llu)" "\n", cache->uc_name, entry, entry->ue_key); + write_lock_from_read(&cache->uc_lock, &writelock); GOTO(out, rc = 0); } if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) { CERROR("%s: found a stale entry %p (key %llu) in ioctl\n", cache->uc_name, entry, entry->ue_key); + write_lock_from_read(&cache->uc_lock, &writelock); GOTO(out, rc = -EINVAL); } - spin_unlock(&cache->uc_lock); + read_unlock(&cache->uc_lock); if (cache->uc_ops->parse_downcall) rc = cache->uc_ops->parse_downcall(cache, entry, args); - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); if (rc) GOTO(out, rc); @@ -377,14 +421,15 @@ int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key, CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key %llu\n", cache->uc_name, entry, entry->ue_key); out: + /* 'goto out' needs to make sure to take a write lock first */ if (rc) { UC_CACHE_SET_INVALID(entry); list_del_init(&entry->ue_hash); } UC_CACHE_CLEAR_ACQUIRING(entry); - spin_unlock(&cache->uc_lock); wake_up_all(&entry->ue_waitq); put_entry(cache, entry); + write_unlock(&cache->uc_lock); RETURN(rc); } @@ -396,7 +441,7 @@ void upcall_cache_flush(struct upcall_cache *cache, int force) int i; ENTRY; - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); for (i = 0; i < cache->uc_hashsize; i++) { list_for_each_entry_safe(entry, next, &cache->uc_hashtable[i], ue_hash) { @@ -408,7 +453,7 @@ void upcall_cache_flush(struct upcall_cache *cache, int force) free_entry(cache, entry); } } - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); EXIT; } EXPORT_SYMBOL(upcall_cache_flush); @@ -423,7 +468,7 @@ void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args) head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key, cache->uc_hashsize)]; - spin_lock(&cache->uc_lock); + write_lock(&cache->uc_lock); list_for_each_entry(entry, head, ue_hash) { if (upcall_compare(cache, entry, key, args) == 0) { found = 1; @@ -438,11 +483,11 @@ void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args) atomic_read(&entry->ue_refcount), entry->ue_flags, ktime_get_real_seconds(), entry->ue_acquire_expire, entry->ue_expire); + get_entry(entry); UC_CACHE_SET_EXPIRED(entry); - if (!atomic_read(&entry->ue_refcount)) - free_entry(cache, entry); + put_entry(cache, entry); } - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); } EXPORT_SYMBOL(upcall_cache_flush_one); @@ -459,7 +504,7 @@ struct upcall_cache *upcall_cache_init(const char *name, const char *upcall, if (!cache) RETURN(ERR_PTR(-ENOMEM)); - spin_lock_init(&cache->uc_lock); + rwlock_init(&cache->uc_lock); init_rwsem(&cache->uc_upcall_rwsem); cache->uc_hashsize = hashsz; LIBCFS_ALLOC(cache->uc_hashtable, diff --git a/lustre/obdclass/upcall_cache_internal.c b/lustre/obdclass/upcall_cache_internal.c index d999a0c..ca78932 100644 --- a/lustre/obdclass/upcall_cache_internal.c +++ b/lustre/obdclass/upcall_cache_internal.c @@ -52,7 +52,10 @@ inline int refresh_entry_internal(struct upcall_cache *cache, lustre_groups_sort(ginfo); } - spin_lock(&cache->uc_lock); + /* we are called from upcall_cache_get_entry() after + * write lock has been dropped + */ + write_lock(&cache->uc_lock); get_entry(entry); entry->u.identity.mi_uid = entry->ue_key; entry->u.identity.mi_gid = fsgid; @@ -62,7 +65,7 @@ inline int refresh_entry_internal(struct upcall_cache *cache, entry->ue_expire = ktime_get_seconds() + cache->uc_entry_expire; UC_CACHE_SET_VALID(entry); UC_CACHE_CLEAR_ACQUIRING(entry); - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); put_entry(cache, entry); CDEBUG(D_OTHER, @@ -142,9 +145,12 @@ int upcall_cache_get_entry_internal(struct upcall_cache *cache, /* force refresh as an existing cache entry * cannot be modified */ - spin_lock(&cache->uc_lock); + /* we are called from upcall_cache_get_entry() after + * write lock has been dropped + */ + write_lock(&cache->uc_lock); entry->ue_expire = ktime_get_seconds(); - spin_unlock(&cache->uc_lock); + write_unlock(&cache->uc_lock); } } -- 1.8.3.1