Whamcloud - gitweb
LU-18483 ldlm: serialize parallel lock res change requests 10/57110/2
authorAndrew Perepechko <andrew.perepechko@hpe.com>
Fri, 22 Nov 2024 17:05:36 +0000 (20:05 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 2 Jan 2025 20:42:10 +0000 (20:42 +0000)
If parallel ldlm_lock_change_resource() calls race in a certain
way, the old resource can be released multiple times leading to
memory corruption.

The fix detects such race condtions after locking and restarts
the replace procedure if needed.

Change-Id: I742a8dde0ab4c63517c02c2e8e1bdfe402b331b4
Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
HPE-bug-id: LUS-12212
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57110
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Yang Sheng <ys@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lock.c

index 06d0c3e..001bc1a 100644 (file)
@@ -548,23 +548,30 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
         * To flip the lock from the old to the new resource, oldres
         * and newres have to be locked. Resource spin-locks are taken
         * in the memory address order to avoid dead-locks.
-        * As this is the only circumstance where ->l_resource
-        * can change, and this cannot race with itself, it is safe
-        * to access lock->l_resource without being careful about locking.
         */
-       oldres = lock->l_resource;
-       if (oldres < newres) {
-               lock_res(oldres);
-               lock_res_nested(newres, LRT_NEW);
-       } else {
-               lock_res(newres);
-               lock_res_nested(oldres, LRT_NEW);
-       }
-       LASSERT(memcmp(new_resid, &oldres->lr_name,
-                      sizeof(oldres->lr_name)) != 0);
-       rcu_assign_pointer(lock->l_resource, newres);
-       unlock_res(oldres);
-       unlock_res(newres);
+       rcu_read_lock();
+       while (1) {
+               oldres = rcu_dereference(lock->l_resource);
+               if (oldres == newres)
+                       break;
+
+               if (oldres < newres) {
+                       lock_res(oldres);
+                       lock_res_nested(newres, LRT_NEW);
+               } else {
+                       lock_res(newres);
+                       lock_res_nested(oldres, LRT_NEW);
+               }
+               if (lock->l_resource == oldres) {
+                       rcu_assign_pointer(lock->l_resource, newres);
+                       unlock_res(oldres);
+                       unlock_res(newres);
+                       break;
+               }
+               unlock_res(oldres);
+               unlock_res(newres);
+       }
+       rcu_read_unlock();
 
        /* ...and the flowers are still standing! */
        ldlm_resource_putref(oldres);