From: NeilBrown Date: Mon, 22 Jul 2019 03:04:11 +0000 (-0400) Subject: LU-12565 obdecho: use bit-locking in echo_client. X-Git-Tag: 2.12.57~16 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=a0ed8ac61e199993afe28f58801fa0182e69a2f8 LU-12565 obdecho: use bit-locking in echo_client. The ep_lock used by echo client causes lockdep to complain. Multiple locks of the same class are taken concurrently which appear to lockdep to be prone to deadlocking, and can fill up lockdep's fixed size stack for locks. As ep_lock is taken on multiple pages always in ascending page order, deadlocks don't happen, so this is a false-positive. The function of the ep_lock is the same as thats for page_lock(), which is implemented as a bit-lock using wait_on_bit(). lockdep cannot see these locks, and doesn't really need to. So convert ep_lock to a simple bit-lock using wait_on_bit for waiting. This provides similar functionality, matches how page_lock() works, and avoids lockdep problems. Linux-commit: f017f3ff7eb704ea1fc125a90a39b693ee84bd0a Change-Id: I97050e1c88ee27ca4e05b4b39a65e1850f42534b Signed-off-by: NeilBrown Reviewed-on: https://review.whamcloud.com/35563 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index ee6167a..5262121 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -94,7 +94,7 @@ struct echo_object_conf { struct echo_page { struct cl_page_slice ep_cl; - struct mutex ep_lock; + unsigned long ep_lock; }; struct echo_lock { @@ -288,10 +288,13 @@ static int echo_page_own(const struct lu_env *env, { struct echo_page *ep = cl2echo_page(slice); - if (!nonblock) - mutex_lock(&ep->ep_lock); - else if (!mutex_trylock(&ep->ep_lock)) - return -EAGAIN; + if (!nonblock) { + if (test_and_set_bit(0, &ep->ep_lock)) + return -EAGAIN; + } else { + while (test_and_set_bit(0, &ep->ep_lock)) + wait_on_bit(&ep->ep_lock, 0, TASK_UNINTERRUPTIBLE); + } return 0; } @@ -301,8 +304,8 @@ static void echo_page_disown(const struct lu_env *env, { struct echo_page *ep = cl2echo_page(slice); - LASSERT(mutex_is_locked(&ep->ep_lock)); - mutex_unlock(&ep->ep_lock); + LASSERT(test_bit(0, &ep->ep_lock)); + clear_and_wake_up_bit(0, &ep->ep_lock); } static void echo_page_discard(const struct lu_env *env, @@ -315,7 +318,7 @@ static void echo_page_discard(const struct lu_env *env, static int echo_page_is_vmlocked(const struct lu_env *env, const struct cl_page_slice *slice) { - if (mutex_is_locked(&cl2echo_page(slice)->ep_lock)) + if (test_bit(0, &cl2echo_page(slice)->ep_lock)) return -EBUSY; return -ENODATA; } @@ -353,7 +356,7 @@ static int echo_page_print(const struct lu_env *env, struct echo_page *ep = cl2echo_page(slice); (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n", - ep, mutex_is_locked(&ep->ep_lock), + ep, test_bit(0, &ep->ep_lock), slice->cpl_page->cp_vmpage); return 0; } @@ -414,7 +417,13 @@ static int echo_page_init(const struct lu_env *env, struct cl_object *obj, ENTRY; get_page(page->cp_vmpage); - mutex_init(&ep->ep_lock); + /* + * ep_lock is similar to the lock_page() lock, and + * cannot usefully be monitored by lockdep. + * So just use a bit in an "unsigned long" and use the + * wait_on_bit() interface to wait for the bit to be clear. + */ + ep->ep_lock = 0; cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops); atomic_inc(&eco->eo_npages); RETURN(0);