From 2506fa2a42b7ae542fae9346a9cc8a6ae271f5de Mon Sep 17 00:00:00 2001 From: Mikhal Pershin Date: Fri, 3 Nov 2017 12:38:04 +0300 Subject: [PATCH] LU-11121 mdt: take discard lock at cleanup stage Call mdt_dom_check_and_discard() after mdt_object_unlock() to avoid possible deadlock if some third lock is conflicting with both like in the scenario below: thread1: mdt_object_lock() with some bits thread2: take conflicting lock and wait thread1: mdt_dom_check_and_discard() with bits conflicting with thread2 causes deadlock. Patch enables dom layout in racer to test it on regular basis Another minor update uses 'trap' in related tests. Test-Parameters: mdssizegb=20 mdtcount=1 mdscount=1 testlist=sanity-dom,dom-performance,racer,racer,racer Signed-off-by: Mikhail Pershin Change-Id: I63bedabb4a82cfa2f01e126d35dc8c2a89d64f56 Reviewed-on: https://review.whamcloud.com/29930 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_handler.c | 18 +++++++++--------- lustre/mdt/mdt_reint.c | 15 ++++++++++----- lustre/tests/racer.sh | 2 +- lustre/tests/sanityn.sh | 43 +++++++++++++++++++++---------------------- 4 files changed, 41 insertions(+), 37 deletions(-) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index b0ad257..6550570 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1840,7 +1840,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, LDLM_LOCK_PUT(lock); mdt_object_put(info->mti_env, child); /* NB: call the mdt_pack_size2body always after - * mdt_object_put(), that is why this speacial + * mdt_object_put(), that is why this special * exit path is used. */ rc = mdt_pack_size2body(info, child_fid, &lhc->mlh_reg_lh); @@ -1854,17 +1854,17 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, GOTO(out_parent, rc = 0); } - } - if (lock) - LDLM_LOCK_PUT(lock); + } + if (lock) + LDLM_LOCK_PUT(lock); - EXIT; + EXIT; out_child: - mdt_object_put(info->mti_env, child); + mdt_object_put(info->mti_env, child); out_parent: - if (lhp) - mdt_object_unlock(info, parent, lhp, 1); - return rc; + if (lhp) + mdt_object_unlock(info, parent, lhp, 1); + return rc; } /* normal handler: should release the child lock */ diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 4470fb0..5c451c0 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -847,7 +847,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *child_lh; struct ldlm_enqueue_info *einfo = &info->mti_einfo; __u64 lock_ibits; - bool cos_incompat = false; + bool cos_incompat = false, discard = false; int no_name = 0; int rc; @@ -1029,7 +1029,7 @@ relock: if (rc) GOTO(out_stat, rc); } else { - mdt_dom_check_and_discard(info, mc); + discard = true; } mdt_handle_last_unlink(info, mc, ma); @@ -1058,6 +1058,8 @@ out_stat: unlock_child: mdt_reint_striped_unlock(info, mc, child_lh, einfo, rc); put_child: + if (discard) + mdt_dom_check_and_discard(info, mc); mdt_object_put(info->mti_env, mc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); @@ -1881,7 +1883,7 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info, struct lu_fid *new_fid = &info->mti_tmp_fid2; __u64 lock_ibits; bool reverse = false; - bool cos_incompat; + bool cos_incompat, discard = false; int rc; ENTRY; @@ -2150,7 +2152,7 @@ relock: mdt_counter_incr(req, LPROC_MDT_RENAME); if (mnew) { mdt_handle_last_unlink(info, mnew, ma); - mdt_dom_check_and_discard(info, mnew); + discard = true; } mdt_rename_counter_tally(info, info->mti_mdt, req, @@ -2163,8 +2165,11 @@ relock: out_unlock_old: mdt_object_unlock(info, mold, lh_oldp, rc); out_put_new: - if (mnew != NULL) + if (mnew != NULL) { + if (discard) + mdt_dom_check_and_discard(info, mnew); mdt_object_put(info->mti_env, mnew); + } out_put_old: mdt_object_put(info->mti_env, mold); out_unlock_parents: diff --git a/lustre/tests/racer.sh b/lustre/tests/racer.sh index ea70258..904ff71 100644 --- a/lustre/tests/racer.sh +++ b/lustre/tests/racer.sh @@ -53,7 +53,7 @@ RACER_ENABLE_STRIPED_DIRS=${RACER_ENABLE_STRIPED_DIRS:-false} RACER_ENABLE_MIGRATION=${RACER_ENABLE_MIGRATION:-false} RACER_ENABLE_SNAPSHOT=${RACER_ENABLE_SNAPSHOT:-true} RACER_ENABLE_PFL=${RACER_ENABLE_PFL:-true} -RACER_ENABLE_DOM=${RACER_ENABLE_DOM:-false} +RACER_ENABLE_DOM=${RACER_ENABLE_DOM:-true} RACER_ENABLE_FLR=${RACER_ENABLE_FLR:-true} check_progs_installed $CLIENTS $racer || diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 86367ae..b1b96de 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -4371,19 +4371,19 @@ test_101a() { # to get layout $CHECKSTAT -t file $DIR1/$tfile - OLD_VAL=$(cat /proc/sys/vm/dirty_writeback_centisecs) - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo $OLD_VAL + local old_wb=$(sysctl -n vm.dirty_writeback_centisecs) + sysctl -wq vm.dirty_writeback_centisecs=0 + + trap "sysctl -wq vm.dirty_writeback_centisecs=$old_wb" EXIT # open + IO lock dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || error_noexit "Write fails" # must discard pages lctl set_param -n mdc.*.stats=clear - rm $DIR2/$tfile || error_noexit "Unlink fails" - local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) - echo $OLD_VAL > /proc/sys/vm/dirty_writeback_centisecs + rm $DIR2/$tfile || error "Unlink fails" + local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) [ $writes -eq 0 ] || error "Found WRITE RPC but expect none" } run_test 101a "Discard DoM data on unlink" @@ -4397,18 +4397,18 @@ test_101b() { # to get layout $CHECKSTAT -t file $DIR1/$tfile - OLD_VAL=$(cat /proc/sys/vm/dirty_writeback_centisecs) - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo $OLD_VAL + local old_wb=$(sysctl -n vm.dirty_writeback_centisecs) + sysctl -wq vm.dirty_writeback_centisecs=0 + + trap "sysctl -wq vm.dirty_writeback_centisecs=$old_wb" EXIT # open + IO lock - dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || - error_noexit "Write fails" + dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || error "Write fails" # must discard pages lctl set_param -n mdc.*.stats=clear - mv $DIR2/${tfile}_2 $DIR2/$tfile || error_noexit "Rename fails" + mv $DIR2/${tfile}_2 $DIR2/$tfile || error "Rename fails" + local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) - echo $OLD_VAL > /proc/sys/vm/dirty_writeback_centisecs [ $writes -eq 0 ] || error "Found WRITE RPC but expect none" } run_test 101b "Discard DoM data on rename" @@ -4421,22 +4421,21 @@ test_101c() { # to get layout $CHECKSTAT -t file $DIR1/$tfile - OLD_VAL=$(cat /proc/sys/vm/dirty_writeback_centisecs) - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo $OLD_VAL + local old_wb=$(sysctl -n vm.dirty_writeback_centisecs) + sysctl -wq vm.dirty_writeback_centisecs=0 + + trap "sysctl -wq vm.dirty_writeback_centisecs=$old_wb" EXIT # open + IO lock - dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || - error_noexit "Write fails" + dd if=/dev/zero of=$DIR1/$tfile bs=4096 count=1 || error "Write fails" $MULTIOP $DIR1/$tfile O_c & MULTIOP_PID=$! sleep 1 lctl set_param -n mdc.*.stats=clear - rm $DIR2/$tfile > /dev/null || error_noexit "Unlink fails" - kill -USR1 $MULTIOP_PID && wait $MULTIOP_PID || - error_noexit "multiop failure" + rm $DIR2/$tfile > /dev/null || error "Unlink fails for opened file" + kill -USR1 $MULTIOP_PID && wait $MULTIOP_PID || error "multiop failure" + local writes=$(lctl get_param -n mdc.*.stats | grep ost_write | wc -l) - echo $OLD_VAL > /proc/sys/vm/dirty_writeback_centisecs [ $writes -eq 0 ] || error "Found WRITE RPC but expect none" } run_test 101c "Discard DoM data on close-unlink" -- 1.8.3.1