Whamcloud - gitweb
LU-18214 ldlm: change flock deadlock detection 84/56984/5
authorYang Sheng <ys@whamcloud.com>
Thu, 14 Nov 2024 20:54:22 +0000 (12:54 -0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 22 Jan 2025 18:51:38 +0000 (18:51 +0000)
The flock deadlock detection code thought request lock
same as blocking lock is a bug. In fact, this is a case
of cycling chain. So we should treat it as a deadlock
case. Also clean up the reprocess code.

Lustre-change: https://review.whamcloud.com/56319
Lustre-commit: c2e6fa41aac428222770a4fc2826567a74a6dbc6
Signed-off-by: Yang Sheng <ys@whamcloud.com>
Change-Id: Icf0df4ac281c2cdb6cc57cb79db137d39ecef9e6
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56984
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/ldlm/ldlm_flock.c

index 745c1ea..e447378 100644 (file)
@@ -212,14 +212,11 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
 
                bl_exp_conn = bl_exp->exp_connection;
                if (bl_exp->exp_flock_hash != NULL) {
-                       int found;
-
-                       found = obd_nid_export_for_each(bl_exp->exp_obd,
-                                                       &bl_exp_conn->c_peer.nid,
-                                                       ldlm_flock_lookup_cb,
-                                                       &cb_data);
-                       if (found)
-                               lock = cb_data.lock;
+                       obd_nid_export_for_each(bl_exp->exp_obd,
+                                               &bl_exp_conn->c_peer.nid,
+                                               ldlm_flock_lookup_cb,
+                                               &cb_data);
+                       lock = cb_data.lock;
                }
                if (lock == NULL)
                        break;
@@ -227,7 +224,6 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
                class_export_put(bl_exp);
                bl_exp = cb_data.exp;
 
-               LASSERT(req != lock);
                flock = &lock->l_policy_data.l_flock;
                LASSERT(flock->owner == bl_owner);
                bl_owner = flock->blocking_owner;
@@ -237,12 +233,16 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
                cfs_hash_put(bl_exp->exp_flock_hash, &lock->l_exp_flock_hash);
                bl_exp = bl_exp_new;
 
+               if (req == lock) {
+                       class_export_put(bl_exp);
+                       return 1;
+               }
                if (bl_exp->exp_failed)
                        break;
 
                if (bl_owner == req_owner &&
                    nid_same(&bl_exp_conn->c_peer.nid,
-                             &req_exp->exp_connection->c_peer.nid)) {
+                            &req_exp->exp_connection->c_peer.nid)) {
                        class_export_put(bl_exp);
                        return 1;
                }