From 0136a91b6d629556ef091f5ca210c13772207df9 Mon Sep 17 00:00:00 2001 From: Wang Di Date: Wed, 22 Oct 2014 22:50:10 -0700 Subject: [PATCH] LU-3534 update: change sync updates to async updates 1. change sync updates to async updates. 2. add tests(110,111,112,115) in replay-single to test async update recovery. 3. Add replay-barrier for those old DNE replay tests, so all of MDTs can be recoveried correctly. 4. enable DNE failover to dbench test (replay-single.sh 70b). Change-Id: Ibe1ca42d60c7a0ec870b9cf4a70a71adc663b951 Signed-off-by: Wang Di Reviewed-on: http://review.whamcloud.com/12450 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/include/lustre_export.h | 1 - lustre/mdt/mdt_handler.c | 5 - lustre/osp/osp_trans.c | 7 +- lustre/target/tgt_lastrcvd.c | 7 +- lustre/target/update_trans.c | 2 - lustre/tests/conf-sanity.sh | 8 +- lustre/tests/replay-single.sh | 698 ++++++++++++++++++++++++++++++++++++++++- lustre/tests/sanity.sh | 2 +- 8 files changed, 708 insertions(+), 22 deletions(-) diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index d9af664..ce40d58 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -237,7 +237,6 @@ struct obd_export { exp_req_replay_needed:1, exp_lock_replay_needed:1, exp_need_sync:1, - exp_keep_sync:1, exp_flvr_changed:1, exp_flvr_adapt:1, exp_libclient:1, /* liblustre client? */ diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 1b50be9..1bb8a0e 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5031,11 +5031,6 @@ static int mdt_obd_connect(const struct lu_env *env, mdt_export_stats_init(obd, lexp, localdata); } - - /* For phase I, sync for cross-ref operation. */ - spin_lock(&lexp->exp_lock); - lexp->exp_keep_sync = 1; - spin_unlock(&lexp->exp_lock); } out: if (rc != 0) { diff --git a/lustre/osp/osp_trans.c b/lustre/osp/osp_trans.c index 6d0067a..c1d1950 100644 --- a/lustre/osp/osp_trans.c +++ b/lustre/osp/osp_trans.c @@ -545,11 +545,12 @@ int osp_trans_update_request_create(struct thandle *th) return PTR_ERR(our); } - if (dt2osp_dev(th->th_dev)->opd_connect_mdt) - our->our_flags = UPDATE_FL_SYNC; - oth->ot_our = our; our->our_th = oth; + + if (oth->ot_super.th_sync) + oth->ot_our->our_flags |= UPDATE_FL_SYNC; + return 0; } diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index d1f74aa..1aa891f 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -492,11 +492,8 @@ static void tgt_cb_new_client(struct lu_env *env, struct thandle *th, ccb->lncc_exp->exp_client_uuid.uuid); spin_lock(&ccb->lncc_exp->exp_lock); - /* XXX: Currently, we use per-export based sync/async policy for - * the update via OUT RPC, it is coarse-grained policy, and - * will be changed as per-request based by DNE II patches. */ - if (!ccb->lncc_exp->exp_keep_sync) - ccb->lncc_exp->exp_need_sync = 0; + + ccb->lncc_exp->exp_need_sync = 0; spin_unlock(&ccb->lncc_exp->exp_lock); class_export_cb_put(ccb->lncc_exp); diff --git a/lustre/target/update_trans.c b/lustre/target/update_trans.c index 0d2d995..28bf6be 100644 --- a/lustre/target/update_trans.c +++ b/lustre/target/update_trans.c @@ -1019,8 +1019,6 @@ struct thandle *thandle_get_sub_by_dt(const struct lu_env *env, top_th->tt_master_sub_thandle); if (IS_ERR(st)) GOTO(stop_trans, rc = PTR_ERR(st)); - top_th->tt_master_sub_thandle->th_sync = 1; - top_th->tt_super.th_sync = 1; } /* create and init sub th to the top trans list */ diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 811d99e..3c453f1 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1643,6 +1643,7 @@ t32_test() { trap 'trap - RETURN; t32_test_cleanup' RETURN + load_modules mkdir -p $tmp/mnt/lustre || error "mkdir $tmp/mnt/lustre failed" $r mkdir -p $tmp/mnt/{mdt,ost} $r tar xjvf $tarball -S -C $tmp || { @@ -1893,11 +1894,13 @@ t32_test() { } if [ "$dne_upgrade" != "no" ]; then - $LFS mkdir -i 1 $tmp/mnt/lustre/remote_dir || { + $LFS mkdir -i 1 -c2 $tmp/mnt/lustre/remote_dir || { error_noexit "set remote dir failed" return 1 } + $LFS setdirstripe -D -c2 $tmp/mnt/lustre/remote_dir + pushd $tmp/mnt/lustre tar -cf - . --exclude=./remote_dir | tar -xvf - -C remote_dir 1>/dev/null || { @@ -2090,6 +2093,9 @@ test_32c() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return t32_check for tarball in $tarballs; do + # Do not support 1_8 and 2_1 direct upgrade to DNE2 anymore */ + echo $tarball | grep "1_8" && continue + echo $tarball | grep "2_1" && continue dne_upgrade=yes t32_test $tarball writeconf || rc=$? done return $rc diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index d697853..cb52a4b 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -2048,6 +2048,10 @@ test_70b () { local start_ts=$(date +%s) local cmd="rundbench 1 -t $duration" local pid="" + if [ $MDSCOUNT -ge 2 ]; then + test_mkdir -p -c$MDSCOUNT $DIR/$tdir + $LFS setdirstripe -D -c$MDSCOUNT $DIR/$tdir + fi do_nodesv $clients "set -x; MISSING_DBENCH_OK=$MISSING_DBENCH_OK \ PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests/:$DBENCH_LIB \ DBENCH_LIB=$DBENCH_LIB TESTSUITE=$TESTSUITE TESTNAME=$TESTNAME \ @@ -2068,6 +2072,7 @@ test_70b () { elapsed=$(($(date +%s) - start_ts)) local num_failovers=0 + local fail_index=1 while [ $elapsed -lt $duration ]; do if ! check_for_process $clients dbench; then error_noexit "dbench stopped on some of $clients!" @@ -2075,18 +2080,23 @@ test_70b () { break fi sleep 1 - replay_barrier $SINGLEMDS + replay_barrier mds$fail_index sleep 1 # give clients a time to do operations # Increment the number of failovers num_failovers=$((num_failovers+1)) - log "$TESTNAME fail $SINGLEMDS $num_failovers times" - fail $SINGLEMDS + log "$TESTNAME fail mds$fail_index $num_failovers times" + fail mds$fail_index elapsed=$(($(date +%s) - start_ts)) + if [ $fail_index -ge $MDSCOUNT ]; then + fail_index=1 + else + fail_index=$((fail_index+1)) + fi done wait $pid || error "rundbench load on $clients failed!" } -run_test 70b "mds recovery; $CLIENTCOUNT clients" +run_test 70b "dbench ${MDSCOUNT}mdts recovery; $CLIENTCOUNT clients" # end multi-client tests test_73a() { @@ -2172,6 +2182,7 @@ test_80a() { $LFS mkdir -i $MDTIDX $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 fail mds${MDTIDX} wait $CLIENT_PID || error "remote creation failed" @@ -2199,6 +2210,8 @@ test_80b() { $LFS mkdir -i $MDTIDX $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 + replay_barrier mds2 fail mds$((MDTIDX + 1)) wait $CLIENT_PID || error "remote creation failed" @@ -2226,6 +2239,8 @@ test_80c() { $LFS mkdir -i $MDTIDX $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX} fail mds$((MDTIDX + 1)) @@ -2252,6 +2267,9 @@ test_80d() { # sleep 3 seconds to make sure MDTs are failed after # lfs mkdir -i has finished on all of MDTs. sleep 3 + + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX},mds$((MDTIDX + 1)) wait $CLIENT_PID || error "remote creation failed" @@ -2283,6 +2301,7 @@ test_80e() { # lfs mkdir -i has finished on all of MDTs. sleep 3 + replay_barrier mds1 fail mds${MDTIDX} wait $CLIENT_PID || error "remote creation failed" @@ -2309,6 +2328,7 @@ test_80f() { $LFS mkdir -i $MDTIDX $remote_dir & local CLIENT_PID=$! + replay_barrier mds2 fail mds$((MDTIDX + 1)) wait $CLIENT_PID || error "remote creation failed" @@ -2340,6 +2360,8 @@ test_80g() { # lfs mkdir -i has finished on all of MDTs. sleep 3 + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX} fail mds$((MDTIDX + 1)) @@ -2367,6 +2389,8 @@ test_80h() { # lfs mkdir -i has finished on all of MDTs. sleep 3 + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX},mds$((MDTIDX + 1)) wait $CLIENT_PID || error "remote dir creation failed" @@ -2397,6 +2421,7 @@ test_81a() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds2 fail mds$((MDTIDX + 1)) wait $CLIENT_PID || error "rm remote dir failed" @@ -2426,6 +2451,7 @@ test_81b() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 fail mds${MDTIDX} wait $CLIENT_PID || error "rm remote dir failed" @@ -2456,6 +2482,8 @@ test_81c() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX} fail mds$((MDTIDX + 1)) @@ -2482,6 +2510,8 @@ test_81d() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX},mds$((MDTIDX + 1)) wait $CLIENT_PID || error "rm remote dir failed" @@ -2513,6 +2543,7 @@ test_81e() { local CLIENT_PID=$! do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0 + replay_barrier mds1 fail mds${MDTIDX} wait $CLIENT_PID || error "rm remote dir failed" @@ -2543,6 +2574,7 @@ test_81f() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds2 fail mds$((MDTIDX + 1)) wait $CLIENT_PID || error "rm remote dir failed" @@ -2573,6 +2605,8 @@ test_81g() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX} fail mds$((MDTIDX + 1)) @@ -2599,6 +2633,8 @@ test_81h() { rmdir $remote_dir & local CLIENT_PID=$! + replay_barrier mds1 + replay_barrier mds2 fail mds${MDTIDX},mds$((MDTIDX + 1)) wait $CLIENT_PID || error "rm remote dir failed" @@ -3059,6 +3095,660 @@ test_101() { #LU-5648 } run_test 101 "Shouldn't reassign precreated objs to other files after recovery" +check_striped_dir_110() +{ + $CHECKSTAT -t dir $DIR/$tdir/striped_dir || + error "create striped dir failed" + local stripe_count=$($LFS getdirstripe -c $DIR/$tdir/striped_dir) + [ $stripe_count -eq $MDSCOUNT ] || + error "$stripe_count != 2 after recovery" +} + +test_110a() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds1 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + fail mds1 + + check_striped_dir_110 || error "check striped_dir failed" + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110a "DNE: create striped dir, fail MDT1" + +test_110b() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds1 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + umount $MOUNT + fail mds1 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + + check_striped_dir_110 || error "check striped_dir failed" + + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110b "DNE: create striped dir, fail MDT1 and client" + +test_110c() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds2 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + fail mds2 + + check_striped_dir_110 || error "check striped_dir failed" + + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110c "DNE: create striped dir, fail MDT2" + +test_110d() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds2 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + umount $MOUNT + fail mds2 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + + check_striped_dir_110 || error "check striped_dir failed" + + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110d "DNE: create striped dir, fail MDT2 and client" + +test_110e() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds2 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + umount $MOUNT + replay_barrier mds1 + fail mds1,mds2 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + + check_striped_dir_110 || error "check striped_dir failed" + + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110e "DNE: create striped dir, uncommit on MDT2, fail client/MDT1/MDT2" + +test_110f() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds1 + replay_barrier mds2 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + fail mds1,mds2 + + check_striped_dir_110 || error "check striped_dir failed" + + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110f "DNE: create striped dir, fail MDT1/MDT2" + +test_110g() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + replay_barrier mds1 + $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir + umount $MOUNT + replay_barrier mds2 + fail mds1,mds2 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + + check_striped_dir_110 || error "check striped_dir failed" + + rm -rf $DIR/$tdir || error "rmdir failed" + + return 0 +} +run_test 110g "DNE: create striped dir, uncommit on MDT1, fail client/MDT1/MDT2" + +test_111a() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds1 + rm -rf $DIR/$tdir/striped_dir + fail mds1 + + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + return 0 +} +run_test 111a "DNE: unlink striped dir, fail MDT1" + +test_111b() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds2 + rm -rf $DIR/$tdir/striped_dir + umount $MOUNT + fail mds2 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + return 0 +} +run_test 111b "DNE: unlink striped dir, fail MDT2" + +test_111c() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds1 + rm -rf $DIR/$tdir/striped_dir + umount $MOUNT + replay_barrier mds2 + fail mds1,mds2 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + return 0 +} +run_test 111c "DNE: unlink striped dir, uncommit on MDT1, fail client/MDT1/MDT2" + +test_111d() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds2 + rm -rf $DIR/$tdir/striped_dir + umount $MOUNT + replay_barrier mds1 + fail mds1,mds2 + zconf_mount $(hostname) $MOUNT + client_up || return 1 + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + + return 0 +} +run_test 111d "DNE: unlink striped dir, uncommit on MDT2, fail client/MDT1/MDT2" + +test_111e() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds2 + rm -rf $DIR/$tdir/striped_dir + replay_barrier mds1 + fail mds1,mds2 + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + return 0 +} +run_test 111e "DNE: unlink striped dir, uncommit on MDT2, fail MDT1/MDT2" + +test_111f() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds1 + rm -rf $DIR/$tdir/striped_dir + replay_barrier mds2 + fail mds1,mds2 + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + return 0 +} +run_test 111f "DNE: unlink striped dir, uncommit on MDT1, fail MDT1/MDT2" + +test_111g() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + replay_barrier mds1 + replay_barrier mds2 + rm -rf $DIR/$tdir/striped_dir + fail mds1,mds2 + $CHECKSTAT -t dir $DIR/$tdir/striped_dir && + error "striped dir still exists" + return 0 +} +run_test 111g "DNE: unlink striped dir, fail MDT1/MDT2" + +test_112_rename_prepare() { + mkdir -p $DIR/$tdir/src_dir + $LFS mkdir -i 1 $DIR/$tdir/src_dir/src_child || + error "create remote source failed" + + touch $DIR/$tdir/src_dir/src_child/a + + $LFS mkdir -i 2 $DIR/$tdir/tgt_dir || + error "create remote target dir failed" + + $LFS mkdir -i 3 $DIR/$tdir/tgt_dir/tgt_child || + error "create remote target child failed" +} + +test_112_check() { + find $DIR/$tdir/ + $CHECKSTAT -t dir $DIR/$tdir/src_dir/src_child && + error "src_child still exists after rename" + + $CHECKSTAT -t file $DIR/$tdir/tgt_dir/tgt_child/a || + error "missing file(a) after rename" +} + +test_112a() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + fail mds1 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112a "DNE: cross MDT rename, fail MDT1" + +test_112b() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds2 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds2 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112b "DNE: cross MDT rename, fail MDT2" + +test_112c() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds3 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds3 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112c "DNE: cross MDT rename, fail MDT3" + +test_112d() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112d "DNE: cross MDT rename, fail MDT4" + +test_112e() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + replay_barrier mds2 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds1,mds2 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112e "DNE: cross MDT rename, fail MDT1 and MDT2" + +test_112f() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + replay_barrier mds3 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds1,mds3 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112f "DNE: cross MDT rename, fail MDT1 and MDT3" + +test_112g() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds1,mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112g "DNE: cross MDT rename, fail MDT1 and MDT4" + +test_112h() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds2 + replay_barrier mds3 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds2,mds3 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112h "DNE: cross MDT rename, fail MDT2 and MDT3" + +test_112i() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds2 + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds2,mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112i "DNE: cross MDT rename, fail MDT2 and MDT4" + +test_112j() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds3 + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds3,mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112j "DNE: cross MDT rename, fail MDT3 and MDT4" + +test_112k() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + replay_barrier mds2 + replay_barrier mds3 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds1,mds2,mds3 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112k "DNE: cross MDT rename, fail MDT1,MDT2,MDT3" + +test_112l() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + replay_barrier mds2 + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds1,mds2,mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112l "DNE: cross MDT rename, fail MDT1,MDT2,MDT4" + +test_112m() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds1 + replay_barrier mds3 + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds1,mds3,mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112m "DNE: cross MDT rename, fail MDT1,MDT3,MDT4" + +test_112n() { + [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + test_112_rename_prepare + replay_barrier mds2 + replay_barrier mds3 + replay_barrier mds4 + + mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child || + error "rename dir cross MDT failed!" + + fail mds2,mds3,mds4 + + test_112_check + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 112n "DNE: cross MDT rename, fail MDT2,MDT3,MDT4" + +test_115() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + local fail_index=0 + local index + local i + local j + + mkdir -p $DIR/$tdir + for ((j=0;j<$((MDSCOUNT));j++)); do + fail_index=$((fail_index+1)) + index=$((fail_index % MDSCOUNT)) + replay_barrier mds$((index + 1)) + for ((i=0;i<5;i++)); do + test_mkdir -i$index -c$MDSCOUNT $DIR/$tdir/test_$i || + error "create striped dir $DIR/$tdir/test_$i" + done + + fail mds$((index + 1)) + for ((i=0;i<5;i++)); do + checkstat -t dir $DIR/$tdir/test_$i || + error "$DIR/$tdir/test_$i does not exist!" + done + rm -rf $DIR/$tdir/test_* || + error "rmdir fails" + done +} +run_test 115 "failover for create/unlink striped directory" + complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 4d53d89..cb24847 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -58,7 +58,7 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh} init_logging -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24D 27m 64b 68 71 77f 78 115 124b 230d" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24D 27m 64b 68 71 77f 78 115 124b" if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then # bug number for skipped test: LU-4536 LU-1957 LU-2805 -- 1.8.3.1