Whamcloud - gitweb
LU-1601 ldlm: Fix flock detection for different mounts 76/3276/12
authorAndriy Skulysh <Andriy_Skulysh@xyratex.com>
Wed, 12 Dec 2012 13:53:52 +0000 (15:53 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 18 Sep 2013 14:33:16 +0000 (14:33 +0000)
Deadlock can happen when 2 processes take concurrent locks
on files situated in different mountpoints.

Modify flock detection algorithm to distinguish process by
pair PID+NID instead of PID+export.
It is done by searching for a blocking owner in all OBD's
exports with the same NID.

Xyratex-bug-id: MRP-449
Signed-off-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Reviewed-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-by: Bruce Korb <bruce_korb@xyratex.com>
Change-Id: Ib2969df9b9733af4025e1905caf2378af72c6f18
Reviewed-on: http://review.whamcloud.com/3276
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ldlm/ldlm_flock.c
lustre/tests/flocks_test.c
lustre/tests/sanityn.sh

index 2fb4a99..f5e63fe 100644 (file)
@@ -169,6 +169,31 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, __u64 flags)
  * one client holds a lock on something and want a lock on something
  * else and at the same time another client has the opposite situation).
  */
+
+struct ldlm_flock_lookup_cb_data {
+       __u64 *bl_owner;
+       struct ldlm_lock *lock;
+       struct obd_export *exp;
+};
+
+static int ldlm_flock_lookup_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+                               cfs_hlist_node_t *hnode, void *data)
+{
+       struct ldlm_flock_lookup_cb_data *cb_data = data;
+       struct obd_export *exp = cfs_hash_object(hs, hnode);
+       struct ldlm_lock *lock;
+
+       lock = cfs_hash_lookup(exp->exp_flock_hash, cb_data->bl_owner);
+       if (lock == NULL)
+               return 0;
+
+       /* Stop on first found lock. Same process can't sleep twice */
+       cb_data->lock = lock;
+       cb_data->exp = class_export_get(exp);
+
+       return 1;
+}
+
 static int
 ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
 {
@@ -183,16 +208,26 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
 
         class_export_get(bl_exp);
        while (1) {
+               struct ldlm_flock_lookup_cb_data cb_data = {
+                                       .bl_owner = &bl_owner,
+                                       .lock = NULL,
+                                       .exp = NULL };
                struct obd_export *bl_exp_new;
                struct ldlm_lock *lock = NULL;
                struct ldlm_flock *flock;
 
-               if (bl_exp->exp_flock_hash != NULL)
-                       lock = cfs_hash_lookup(bl_exp->exp_flock_hash,
-                                              &bl_owner);
+               if (bl_exp->exp_flock_hash != NULL) {
+                       cfs_hash_for_each_key(bl_exp->exp_obd->obd_nid_hash,
+                               &bl_exp->exp_connection->c_peer.nid,
+                               ldlm_flock_lookup_cb, &cb_data);
+                       lock = cb_data.lock;
+               }
                if (lock == NULL)
                        break;
 
+               class_export_put(bl_exp);
+               bl_exp = cb_data.exp;
+
                LASSERT(req != lock);
                flock = &lock->l_policy_data.l_flock;
                LASSERT(flock->owner == bl_owner);
@@ -206,7 +241,9 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
                if (bl_exp->exp_failed)
                        break;
 
-                if (bl_owner == req_owner && bl_exp == req_exp) {
+               if (bl_owner == req_owner &&
+                   (bl_exp->exp_connection->c_peer.nid ==
+                    req_exp->exp_connection->c_peer.nid)) {
                         class_export_put(bl_exp);
                         return 1;
                 }
index 82f7cfd..ab38fa0 100644 (file)
@@ -357,6 +357,86 @@ out:
         return rc;
 }
 
+int t4(int argc, char *argv[])
+{
+       struct flock lock = {
+               .l_type = F_WRLCK,
+               .l_whence = SEEK_SET,
+               .l_start = 0,
+               .l_len = 10,
+       };
+
+       int fd, fd2;
+       int pid;
+       int rc = EXIT_SUCCESS;
+
+       if (argc != 4) {
+               fprintf(stderr, "Usage: ./flocks_test 4 file1 file2\n");
+               return EXIT_FAILURE;
+       }
+
+       if ((fd = open(argv[2], O_RDWR)) < 0) {
+               fprintf(stderr, "Couldn't open file: %s\n", argv[2]);
+               return EXIT_FAILURE;
+       }
+       if ((fd2 = open(argv[3], O_RDWR)) < 0) {
+               fprintf(stderr, "Couldn't open file: %s\n", argv[3]);
+               rc = EXIT_FAILURE;
+               goto out;
+       }
+
+       pid = fork();
+       if (pid == -1) {
+               perror("fork");
+               rc = EXIT_FAILURE;
+               goto out;
+       }
+
+       if (pid == 0) {
+               printf("%d: get lock1\n", getpid());
+               fflush(stdout);
+               if (t_fcntl(fd, F_SETLKW, &lock) < 0) {
+                       perror("first flock failed");
+                       rc = EXIT_FAILURE;
+                       goto out_child;
+               }
+               printf("%d: done\n", getpid());
+               sleep(3);
+               printf("%d: get lock2\n", getpid());
+               fflush(stdout);
+               if (t_fcntl(fd2, F_SETLKW, &lock) < 0) {
+                       perror("first flock failed");
+                       rc = EXIT_FAILURE;
+                       goto out_child;
+               }
+               printf("%d: done\n", getpid());
+out_child:
+               printf("%d: exit rc=%d\n", getpid(), rc);
+               exit(rc);
+       } else {
+               printf("%d: get lock2\n", getpid());
+               fflush(stdout);
+               if (t_fcntl(fd2, F_SETLKW, &lock) < 0) {
+                       perror("first flock failed");
+                       rc = EXIT_FAILURE;
+                       goto out;
+               }
+               printf("%d: done\n", getpid());
+               sleep(3);
+               printf("%d: get lock1\n", getpid());
+               fflush(stdout);
+               if (t_fcntl(fd, F_SETLKW, &lock) < 0) {
+                       perror("first flock failed");
+                       rc = EXIT_FAILURE;
+                       goto out;
+               }
+               printf("%d: done\n", getpid());
+       }
+
+out:
+       printf("%d: exit rc=%d\n", getpid(), rc);
+       return rc;
+}
 
 /** ==============================================================
  * program entry
@@ -387,6 +467,9 @@ int main(int argc, char* argv[])
         case 3:
                 rc = t3(argc, argv);
                 break;
+       case 4:
+               rc = t4(argc, argv);
+               break;
         default:
                 fprintf(stderr, "unknow test number %s\n", argv[1]);
                 break;
index 45d5dc9..63321ae 100644 (file)
@@ -2525,6 +2525,13 @@ test_73() {
 }
 run_test 73 "getxattr should not cause xattr lock cancellation"
 
+test_74() {
+       dd if=/dev/zero of=$DIR1/$tfile-1 bs=1K count=1
+       dd if=/dev/zero of=$DIR1/$tfile-2 bs=1K count=1
+       flocks_test 4 $DIR1/$tfile-1 $DIR2/$tfile-2
+}
+run_test 74 "flock deadlock: different mounts =============="
+
 log "cleanup: ======================================================"
 
 [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2