From 6a073773939cd515055dfb85e522001121261feb Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Wed, 12 Dec 2012 15:53:52 +0200 Subject: [PATCH] LU-1601 ldlm: Fix flock detection for different mounts Deadlock can happen when 2 processes take concurrent locks on files situated in different mountpoints. Modify flock detection algorithm to distinguish process by pair PID+NID instead of PID+export. It is done by searching for a blocking owner in all OBD's exports with the same NID. Xyratex-bug-id: MRP-449 Signed-off-by: Andriy Skulysh Reviewed-by: Vitaly Fertman Reviewed-by: Bruce Korb Change-Id: Ib2969df9b9733af4025e1905caf2378af72c6f18 Reviewed-on: http://review.whamcloud.com/3276 Reviewed-by: Keith Mannthey Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_flock.c | 45 ++++++++++++++++++++++--- lustre/tests/flocks_test.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ lustre/tests/sanityn.sh | 7 ++++ 3 files changed, 131 insertions(+), 4 deletions(-) diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 2fb4a99..f5e63fe 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -169,6 +169,31 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, __u64 flags) * one client holds a lock on something and want a lock on something * else and at the same time another client has the opposite situation). */ + +struct ldlm_flock_lookup_cb_data { + __u64 *bl_owner; + struct ldlm_lock *lock; + struct obd_export *exp; +}; + +static int ldlm_flock_lookup_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd, + cfs_hlist_node_t *hnode, void *data) +{ + struct ldlm_flock_lookup_cb_data *cb_data = data; + struct obd_export *exp = cfs_hash_object(hs, hnode); + struct ldlm_lock *lock; + + lock = cfs_hash_lookup(exp->exp_flock_hash, cb_data->bl_owner); + if (lock == NULL) + return 0; + + /* Stop on first found lock. Same process can't sleep twice */ + cb_data->lock = lock; + cb_data->exp = class_export_get(exp); + + return 1; +} + static int ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock) { @@ -183,16 +208,26 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock) class_export_get(bl_exp); while (1) { + struct ldlm_flock_lookup_cb_data cb_data = { + .bl_owner = &bl_owner, + .lock = NULL, + .exp = NULL }; struct obd_export *bl_exp_new; struct ldlm_lock *lock = NULL; struct ldlm_flock *flock; - if (bl_exp->exp_flock_hash != NULL) - lock = cfs_hash_lookup(bl_exp->exp_flock_hash, - &bl_owner); + if (bl_exp->exp_flock_hash != NULL) { + cfs_hash_for_each_key(bl_exp->exp_obd->obd_nid_hash, + &bl_exp->exp_connection->c_peer.nid, + ldlm_flock_lookup_cb, &cb_data); + lock = cb_data.lock; + } if (lock == NULL) break; + class_export_put(bl_exp); + bl_exp = cb_data.exp; + LASSERT(req != lock); flock = &lock->l_policy_data.l_flock; LASSERT(flock->owner == bl_owner); @@ -206,7 +241,9 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock) if (bl_exp->exp_failed) break; - if (bl_owner == req_owner && bl_exp == req_exp) { + if (bl_owner == req_owner && + (bl_exp->exp_connection->c_peer.nid == + req_exp->exp_connection->c_peer.nid)) { class_export_put(bl_exp); return 1; } diff --git a/lustre/tests/flocks_test.c b/lustre/tests/flocks_test.c index 82f7cfd..ab38fa0 100644 --- a/lustre/tests/flocks_test.c +++ b/lustre/tests/flocks_test.c @@ -357,6 +357,86 @@ out: return rc; } +int t4(int argc, char *argv[]) +{ + struct flock lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 10, + }; + + int fd, fd2; + int pid; + int rc = EXIT_SUCCESS; + + if (argc != 4) { + fprintf(stderr, "Usage: ./flocks_test 4 file1 file2\n"); + return EXIT_FAILURE; + } + + if ((fd = open(argv[2], O_RDWR)) < 0) { + fprintf(stderr, "Couldn't open file: %s\n", argv[2]); + return EXIT_FAILURE; + } + if ((fd2 = open(argv[3], O_RDWR)) < 0) { + fprintf(stderr, "Couldn't open file: %s\n", argv[3]); + rc = EXIT_FAILURE; + goto out; + } + + pid = fork(); + if (pid == -1) { + perror("fork"); + rc = EXIT_FAILURE; + goto out; + } + + if (pid == 0) { + printf("%d: get lock1\n", getpid()); + fflush(stdout); + if (t_fcntl(fd, F_SETLKW, &lock) < 0) { + perror("first flock failed"); + rc = EXIT_FAILURE; + goto out_child; + } + printf("%d: done\n", getpid()); + sleep(3); + printf("%d: get lock2\n", getpid()); + fflush(stdout); + if (t_fcntl(fd2, F_SETLKW, &lock) < 0) { + perror("first flock failed"); + rc = EXIT_FAILURE; + goto out_child; + } + printf("%d: done\n", getpid()); +out_child: + printf("%d: exit rc=%d\n", getpid(), rc); + exit(rc); + } else { + printf("%d: get lock2\n", getpid()); + fflush(stdout); + if (t_fcntl(fd2, F_SETLKW, &lock) < 0) { + perror("first flock failed"); + rc = EXIT_FAILURE; + goto out; + } + printf("%d: done\n", getpid()); + sleep(3); + printf("%d: get lock1\n", getpid()); + fflush(stdout); + if (t_fcntl(fd, F_SETLKW, &lock) < 0) { + perror("first flock failed"); + rc = EXIT_FAILURE; + goto out; + } + printf("%d: done\n", getpid()); + } + +out: + printf("%d: exit rc=%d\n", getpid(), rc); + return rc; +} /** ============================================================== * program entry @@ -387,6 +467,9 @@ int main(int argc, char* argv[]) case 3: rc = t3(argc, argv); break; + case 4: + rc = t4(argc, argv); + break; default: fprintf(stderr, "unknow test number %s\n", argv[1]); break; diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 45d5dc9..63321ae 100644 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -2525,6 +2525,13 @@ test_73() { } run_test 73 "getxattr should not cause xattr lock cancellation" +test_74() { + dd if=/dev/zero of=$DIR1/$tfile-1 bs=1K count=1 + dd if=/dev/zero of=$DIR1/$tfile-2 bs=1K count=1 + flocks_test 4 $DIR1/$tfile-1 $DIR2/$tfile-2 +} +run_test 74 "flock deadlock: different mounts ==============" + log "cleanup: ======================================================" [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2 -- 1.8.3.1