Whamcloud - gitweb
LU-11102 ldlm: run local lock bl_ast only when necessary 38/32738/9
authorLai Siyao <lai.siyao@intel.com>
Wed, 20 Jun 2018 18:30:18 +0000 (02:30 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 1 Aug 2018 14:25:22 +0000 (14:25 +0000)
LDLM local lock will be canceled after use, and it should only
run bl_ast if it needs to trigger Commit-on-Sharing, otherwise
if this bl_ast does nothing, it will prevent subsequent
operations to run bl_ast again, therefore Commit-on-Sharing
can't be triggered.

For example, a concurrent setattr on a striped directory and
rename under this directory:
1. setattr takes UPDATE lock of directory, but not unlock it yet
(i.e., this lock is not downgraded to COS lock).
2. a concurrent 'mv' under this directory will first getattr file by
name, this getattr will take UPDATE lock of this directory, which is
racing with setattr, but this getattr is not a distributed operation,
and the lock still has writer (by setattr), bl_ast does nothing.
3. setattr unlocks this UPDATE lock.
4. rename tries to lock UPDATE lock of this directory, but this lock
was bl_ast was run before(though nothing did), it won't run again,
rename will wait until setattr transaction commit.

To fix this, run local lock bl_ast only when it will trigger
Commit-on-Sharing.

Add sanity.sh test_415 to verify this.

Signed-off-by: Lai Siyao <lai.siyao@intel.com>
Change-Id: Idae241076e7cae8fe06ae6a34481fe19c7dfd2f3
Reviewed-on: https://review.whamcloud.com/32738
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_inodebits.c
lustre/tests/sanity.sh

index c5d5d5a..d6ededf 100644 (file)
 #include "ldlm_internal.h"
 
 #ifdef HAVE_SERVER_SUPPORT
+/*
+ * local lock will be canceled after use, and it should run blocking ast only
+ * when it should trigger Commit-on-Sharing, otherwise if the blocking ast
+ * is run and does nothing, it will prevent subsequent operations to trigger
+ * Commit-on-Sharing, see LU-11102.
+ */
+static bool ldlm_should_run_bl_ast(const struct ldlm_lock *lock,
+                                  const struct ldlm_lock *req)
+{
+       /* no blocking ast */
+       if (!lock->l_blocking_ast)
+               return false;
+
+       /* not local lock */
+       if (!ldlm_is_local(lock))
+               return true;
+
+       /* should trigger Commit-on-Sharing */
+       if ((lock->l_req_mode & LCK_COS))
+               return true;
+
+       /* local read lock will be canceld after use */
+       if (!(lock->l_req_mode & (LCK_PW | LCK_EX)))
+               return false;
+
+       /* if CoS enabled, check if @req is from different client */
+       if (ldlm_is_cos_enabled(req))
+               return lock->l_client_cookie != req->l_client_cookie;
+
+       /* check if @req is COS incompatible */
+       if (ldlm_is_cos_incompat(req))
+               return true;
+
+       return false;
+}
+
 /**
  * Determine if the lock is compatible with all locks on the queue.
  *
@@ -168,12 +204,12 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req,
 
                                /* Add locks of the policy group to @work_list
                                 * as blocking locks for @req */
-                               if (lock->l_blocking_ast)
+                               if (ldlm_should_run_bl_ast(lock, req))
                                        ldlm_add_ast_work_item(lock, req,
                                                               work_list);
                                head = &lock->l_sl_policy;
                                list_for_each_entry(lock, head, l_sl_policy)
-                                       if (lock->l_blocking_ast)
+                                       if (ldlm_should_run_bl_ast(lock, req))
                                                ldlm_add_ast_work_item(lock,
                                                                req, work_list);
                        }
index c3ba48d..e94393a 100755 (executable)
@@ -18641,6 +18641,47 @@ test_414() {
 }
 run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()"
 
+test_415() {
+       [ $PARALLEL == "yes" ] && skip "skip parallel run"
+       [ $(lustre_version_code mds1) -lt $(version_code 2.11.52) ] &&
+               skip "Need server version at least 2.11.52"
+
+       # LU-11102
+       local total
+       local setattr_pid
+       local start_time
+       local end_time
+       local duration
+
+       total=500
+
+       # though this test is designed for striped directory, let's test normal
+       # directory too since lock is always saved as CoS lock.
+       test_mkdir $DIR/$tdir || error "mkdir $tdir"
+       createmany -o $DIR/$tdir/$tfile. $total || error "createmany"
+
+       (
+               while true; do
+                       touch $DIR/$tdir
+               done
+       ) &
+       setattr_pid=$!
+
+       start_time=$(date +%s)
+       for i in $(seq $total); do
+               mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \
+                       > /dev/null
+       done
+       end_time=$(date +%s)
+       duration=$((end_time - start_time))
+
+       kill -9 $setattr_pid
+
+       echo "rename $total files took $duration sec"
+       [ $duration -lt 100 ] || error "rename took $duration sec"
+}
+run_test 415 "lock revoke is not missing"
+
 prep_801() {
        [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
        [[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&