Whamcloud - gitweb
LU-3534 tests: a few tests cases for async update. 63/15163/14
authorwang di <di.wang@intel.com>
Fri, 31 Jul 2015 11:55:19 +0000 (04:55 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 28 Aug 2015 02:07:07 +0000 (02:07 +0000)
1 Add update migrate test case in conf-sanity.sh 32c.
2. add replay-dual.sh 26 to failover during tar and dbench.

Signed-off-by: wang di <di.wang@intel.com>
Change-Id: I1431bfe8d076a16802d9bba7ca3a7b9d47745f5c
Reviewed-on: http://review.whamcloud.com/15163
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/tests/conf-sanity.sh
lustre/tests/replay-dual.sh
lustre/tests/replay-single.sh

index b0a19ef..23cd0da 100644 (file)
@@ -1646,6 +1646,7 @@ t32_test() {
        local fstype=$(facet_fstype $SINGLEMDS)
        local mdt_dev=$tmp/mdt
        local ost_dev=$tmp/ost
        local fstype=$(facet_fstype $SINGLEMDS)
        local mdt_dev=$tmp/mdt
        local ost_dev=$tmp/ost
+       local dir
 
        trap 'trap - RETURN; t32_test_cleanup' RETURN
 
 
        trap 'trap - RETURN; t32_test_cleanup' RETURN
 
@@ -1907,6 +1908,9 @@ t32_test() {
 
                        $LFS setdirstripe -D -c2 $tmp/mnt/lustre/remote_dir
 
 
                        $LFS setdirstripe -D -c2 $tmp/mnt/lustre/remote_dir
 
+                       $r $LCTL set_param -n   \
+                               mdt.${fsname}*.enable_remote_dir=1 2>/dev/null
+
                        pushd $tmp/mnt/lustre
                        tar -cf - . --exclude=./remote_dir |
                                tar -xvf - -C remote_dir 1>/dev/null || {
                        pushd $tmp/mnt/lustre
                        tar -cf - . --exclude=./remote_dir |
                                tar -xvf - -C remote_dir 1>/dev/null || {
@@ -1992,6 +1996,41 @@ t32_test() {
                        echo "list verification skipped"
                fi
 
                        echo "list verification skipped"
                fi
 
+               if [ $(lustre_version_code mds1) -ge $(version_code 2.7.50) -a \
+                    $dne_upgrade != "no" ]; then
+                       $r $LCTL set_param -n   \
+                               mdt.${fsname}*.enable_remote_dir=1 2>/dev/null
+
+                       echo "test migration"
+                       pushd $tmp/mnt/lustre
+                       # migrate the files/directories to the remote MDT, then
+                       # move it back
+                       for dir in $(find ! -name .lustre ! -name . -type d); do
+                               mdt_index=$($LFS getdirstripe -i $dir)
+                               stripe_cnt=$($LFS getdirstripe -c $dir)
+                               if [ $mdt_index = 0 -a $stripe_cnt -le 1 ]; then
+                                       $LFS mv -M 1 $dir || {
+                                       popd
+                                       error_noexit "migrate MDT1 failed"
+                                       return 1
+                               }
+                               fi
+                       done
+
+                       for dir in $(find ! -name . ! -name .lustre -type d); do
+                               mdt_index=$($LFS getdirstripe -i $dir)
+                               stripe_cnt=$($LFS getdirstripe -c $dir)
+                               if [ $mdt_index = 1 -a $stripe_cnt -le 1 ]; then
+                                       $LFS mv -M 0 $dir || {
+                                       popd
+                                       error_noexit "migrate MDT0 failed"
+                                       return 1
+                               }
+                               fi
+                       done
+                       popd
+               fi
+
                #
                # When adding new data verification tests, please check for
                # the presence of the required reference files first, like
                #
                # When adding new data verification tests, please check for
                # the presence of the required reference files first, like
@@ -2102,6 +2141,7 @@ test_32c() {
                # Do not support 1_8 and 2_1 direct upgrade to DNE2 anymore */
                echo $tarball | grep "1_8" && continue
                echo $tarball | grep "2_1" && continue
                # Do not support 1_8 and 2_1 direct upgrade to DNE2 anymore */
                echo $tarball | grep "1_8" && continue
                echo $tarball | grep "2_1" && continue
+               load_modules
                dne_upgrade=yes t32_test $tarball writeconf || rc=$?
        done
        return $rc
                dne_upgrade=yes t32_test $tarball writeconf || rc=$?
        done
        return $rc
index fa775ab..e3195ea 100755 (executable)
@@ -920,6 +920,91 @@ test_25() {
 }
 run_test 25 "replay|resend"
 
 }
 run_test 25 "replay|resend"
 
+cleanup_26() {
+       trap 0
+       kill -9 $tar_26_pid
+       kill -9 $dbench_26_jpid
+}
+
+test_26() {
+       local clients=${CLIENTS:-$HOSTNAME}
+
+       zconf_mount_clients $clients $MOUNT
+
+       local duration=600
+       [ "$SLOW" = "no" ] && duration=200
+       # set duration to 900 because it takes some time to boot node
+       [ "$FAILURE_MODE" = HARD ] && duration=900
+
+       local elapsed
+       local start_ts=$(date +%s)
+       local rc=0
+
+       trap cleanup_26 EXIT
+       (
+               local tar_dir=$DIR/$tdir/run_tar
+               while true; do
+                       test_mkdir -p -c$MDSCOUNT $tar_dir || break
+                       [ $MDSCOUNT -ge 2 ] &&
+                       $LFS setdirstripe -D -c$MDSCOUNT $tar_dir ||
+                               error "set default dirstripe failed"
+                       cd $tar_dir || break
+                       tar cf - /etc | tar xf - || error "tar failed"
+                       cd $DIR/$tdir || break
+                       rm -rf $tar_dir || break
+               done
+       )&
+       tar_26_pid=$!
+       echo "Started tar $tar_26_pid"
+
+       (
+               local dbench_dir=$DIR2/$tdir/run_dbench
+               while true; do
+                       test_mkdir -p -c$MDSCOUNT $dbench_dir || break
+                       [ $MDSCOUNT -ge 2 ] &&
+                       $LFS setdirstripe -D -c$MDSCOUNT $dbench_dir ||
+                               error "set default dirstripe failed"
+                       cd $dbench_dir || break
+                       rundbench 1 -D $dbench_dir -t 100 > /dev/null 2&>1 ||
+                                                                       break
+                       cd $DIR/$tdir || break
+                       rm -rf $dbench_dir || break
+               done
+       )&
+       dbench_26_pid=$!
+       echo "Started dbench $dbench_26_pid"
+
+       elapsed=$(($(date +%s) - start_ts))
+       local num_failovers=0
+       local fail_index=1
+       while [ $elapsed -lt $duration ]; do
+               ps auxwww | grep -v grep | grep -q $tar_26_pid ||
+                                       error "tar $tar_26_pid stopped"
+               ps auxwww | grep -v grep | grep -q $dbench_26_pid ||
+                                       error "dbench $dbench_26_pid stopped"
+               sleep 2
+               replay_barrier mds$fail_index
+               sleep 2 # give clients a time to do operations
+               # Increment the number of failovers
+               num_failovers=$((num_failovers+1))
+               log "$TESTNAME fail mds$fail_index $num_failovers times"
+               fail mds$fail_index
+               elapsed=$(($(date +%s) - start_ts))
+               if [ $fail_index -ge $MDSCOUNT ]; then
+                       fail_index=1
+               else
+                       fail_index=$((fail_index+1))
+               fi
+       done
+       # stop the client loads
+       kill -0 $tar_26_pid || error "tar $tar_26_pid stopped"
+       kill -0 $dbench_26_pid || error "dbench $dbench_26_pid stopped"
+       killall -9 dbench
+       cleanup_26
+       true
+}
+run_test 26 "dbench and tar with mds failover"
+
 complete $SECONDS
 SLEEP=$((`date +%s` - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
 complete $SECONDS
 SLEEP=$((`date +%s` - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
index d5e7404..e4de09b 100755 (executable)
@@ -2167,6 +2167,145 @@ test_70c () {
 }
 run_test 70c "tar ${MDSCOUNT}mdts recovery"
 
 }
 run_test 70c "tar ${MDSCOUNT}mdts recovery"
 
+cleanup_70d() {
+       trap 0
+       kill -9 $mkdir_70d_pid
+}
+
+test_70d () {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+       local clients=${CLIENTS:-$HOSTNAME}
+       local rc=0
+
+       zconf_mount_clients $clients $MOUNT
+
+       local duration=300
+       [ "$SLOW" = "no" ] && duration=180
+       # set duration to 900 because it takes some time to boot node
+       [ "$FAILURE_MODE" = HARD ] && duration=900
+
+       mkdir -p $DIR/$tdir
+
+       local elapsed
+       local start_ts=$(date +%s)
+
+       trap cleanup_70d EXIT
+       (
+               while true; do
+                       $LFS mkdir -i0 -c2 $DIR/$tdir/test || {
+                               echo "mkdir fails"
+                               break
+                       }
+                       $LFS mkdir -i1 -c2 $DIR/$tdir/test1 || {
+                               echo "mkdir fails"
+                               break
+                       }
+
+                       touch $DIR/$tdir/test/a || {
+                               echo "touch fails"
+                               break;
+                       }
+                       mkdir $DIR/$tdir/test/b || {
+                               echo "mkdir fails"
+                               break;
+                       }
+                       rm -rf $DIR/$tdir/test || {
+                               echo "rmdir fails"
+                               break
+                       }
+
+                       touch $DIR/$tdir/test1/a || {
+                               echo "touch fails"
+                               break;
+                       }
+                       mkdir $DIR/$tdir/test1/b || {
+                               echo "mkdir fails"
+                               break;
+                       }
+
+                       rm -rf $DIR/$tdir/test1 || {
+                               echo "rmdir fails"
+                               break
+                       }
+               done
+       )&
+       mkdir_70d_pid=$!
+       echo "Started  $mkdir_70d_pid"
+
+       random_fail_mdt $MDSCOUNT $duration $mkdir_70d_pid
+       kill -0 $mkdir_70d_pid || error "mkdir/rmdir $mkdir_70d_pid stopped"
+
+       cleanup_70d
+       true
+}
+run_test 70d "mkdir/rmdir striped dir ${MDSCOUNT}mdts recovery"
+
+cleanup_70e() {
+       trap 0
+       kill -9 $rename_70e_pid
+}
+
+test_70e () {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+       local clients=${CLIENTS:-$HOSTNAME}
+       local rc=0
+
+       echo ha > /proc/sys/lnet/debug
+       zconf_mount_clients $clients $MOUNT
+
+       local duration=300
+       [ "$SLOW" = "no" ] && duration=180
+       # set duration to 900 because it takes some time to boot node
+       [ "$FAILURE_MODE" = HARD ] && duration=900
+
+       mkdir -p $DIR/$tdir
+       $LFS mkdir -i0 $DIR/$tdir/test_0
+       $LFS mkdir -i0 $DIR/$tdir/test_1
+       touch $DIR/$tdir/test_0/a
+       touch $DIR/$tdir/test_1/b
+       trap cleanup_70e EXIT
+       (
+               while true; do
+                       mrename $DIR/$tdir/test_0/a $DIR/$tdir/test_1/b > \
+                                               /dev/null || {
+                               echo "a->b fails" 
+                               break;
+                       }
+
+                       checkstat $DIR/$tdir/test_0/a && {
+                               echo "a still exists"
+                               break
+                       }
+
+                       checkstat $DIR/$tdir/test_1/b || {
+                               echo "b still  exists"
+                               break
+                       }
+
+                       touch $DIR/$tdir/test_0/a || {
+                               echo "touch a fails"
+                               break
+                       }
+
+                       mrename $DIR/$tdir/test_1/b $DIR/$tdir/test_0/a > \
+                                               /dev/null || {
+                               echo "a->a fails"
+                               break;
+                       }
+               done
+       )&
+       rename_70e_pid=$!
+       echo "Started  $rename_70e_pid"
+
+       random_fail_mdt 2 $duration $rename_70e_pid
+       kill -0 $rename_70e_pid || error "rename $rename_70e_pid stopped"
+
+       cleanup_70e
+       true
+}
+run_test 70e "rename cross-MDT with random fails"
+
+
 test_73a() {
        multiop_bg_pause $DIR/$tfile O_tSc ||
                error "multiop_bg_pause $DIR/$tfile failed"
 test_73a() {
        multiop_bg_pause $DIR/$tfile O_tSc ||
                error "multiop_bg_pause $DIR/$tfile failed"