From 954cc6754b19a5eb4b9f717f79037c40baa87f3f Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 21 Jun 2018 02:30:18 +0800 Subject: [PATCH] LU-11102 ldlm: run local lock bl_ast only when necessary LDLM local lock will be canceled after use, and it should only run bl_ast if it needs to trigger Commit-on-Sharing, otherwise if this bl_ast does nothing, it will prevent subsequent operations to run bl_ast again, therefore Commit-on-Sharing can't be triggered. For example, a concurrent setattr on a striped directory and rename under this directory: 1. setattr takes UPDATE lock of directory, but not unlock it yet (i.e., this lock is not downgraded to COS lock). 2. a concurrent 'mv' under this directory will first getattr file by name, this getattr will take UPDATE lock of this directory, which is racing with setattr, but this getattr is not a distributed operation, and the lock still has writer (by setattr), bl_ast does nothing. 3. setattr unlocks this UPDATE lock. 4. rename tries to lock UPDATE lock of this directory, but this lock was bl_ast was run before(though nothing did), it won't run again, rename will wait until setattr transaction commit. To fix this, run local lock bl_ast only when it will trigger Commit-on-Sharing. Add sanity.sh test_415 to verify this. Signed-off-by: Lai Siyao Change-Id: Idae241076e7cae8fe06ae6a34481fe19c7dfd2f3 Reviewed-on: https://review.whamcloud.com/32738 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_inodebits.c | 40 ++++++++++++++++++++++++++++++++++++++-- lustre/tests/sanity.sh | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/lustre/ldlm/ldlm_inodebits.c b/lustre/ldlm/ldlm_inodebits.c index c5d5d5a..d6ededf 100644 --- a/lustre/ldlm/ldlm_inodebits.c +++ b/lustre/ldlm/ldlm_inodebits.c @@ -57,6 +57,42 @@ #include "ldlm_internal.h" #ifdef HAVE_SERVER_SUPPORT +/* + * local lock will be canceled after use, and it should run blocking ast only + * when it should trigger Commit-on-Sharing, otherwise if the blocking ast + * is run and does nothing, it will prevent subsequent operations to trigger + * Commit-on-Sharing, see LU-11102. + */ +static bool ldlm_should_run_bl_ast(const struct ldlm_lock *lock, + const struct ldlm_lock *req) +{ + /* no blocking ast */ + if (!lock->l_blocking_ast) + return false; + + /* not local lock */ + if (!ldlm_is_local(lock)) + return true; + + /* should trigger Commit-on-Sharing */ + if ((lock->l_req_mode & LCK_COS)) + return true; + + /* local read lock will be canceld after use */ + if (!(lock->l_req_mode & (LCK_PW | LCK_EX))) + return false; + + /* if CoS enabled, check if @req is from different client */ + if (ldlm_is_cos_enabled(req)) + return lock->l_client_cookie != req->l_client_cookie; + + /* check if @req is COS incompatible */ + if (ldlm_is_cos_incompat(req)) + return true; + + return false; +} + /** * Determine if the lock is compatible with all locks on the queue. * @@ -168,12 +204,12 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req, /* Add locks of the policy group to @work_list * as blocking locks for @req */ - if (lock->l_blocking_ast) + if (ldlm_should_run_bl_ast(lock, req)) ldlm_add_ast_work_item(lock, req, work_list); head = &lock->l_sl_policy; list_for_each_entry(lock, head, l_sl_policy) - if (lock->l_blocking_ast) + if (ldlm_should_run_bl_ast(lock, req)) ldlm_add_ast_work_item(lock, req, work_list); } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c3ba48d..e94393a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -18641,6 +18641,47 @@ test_414() { } run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()" +test_415() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + [ $(lustre_version_code mds1) -lt $(version_code 2.11.52) ] && + skip "Need server version at least 2.11.52" + + # LU-11102 + local total + local setattr_pid + local start_time + local end_time + local duration + + total=500 + + # though this test is designed for striped directory, let's test normal + # directory too since lock is always saved as CoS lock. + test_mkdir $DIR/$tdir || error "mkdir $tdir" + createmany -o $DIR/$tdir/$tfile. $total || error "createmany" + + ( + while true; do + touch $DIR/$tdir + done + ) & + setattr_pid=$! + + start_time=$(date +%s) + for i in $(seq $total); do + mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \ + > /dev/null + done + end_time=$(date +%s) + duration=$((end_time - start_time)) + + kill -9 $setattr_pid + + echo "rename $total files took $duration sec" + [ $duration -lt 100 ] || error "rename took $duration sec" +} +run_test 415 "lock revoke is not missing" + prep_801() { [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] || [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] && -- 1.8.3.1