1 Add update migrate test case in conf-sanity.sh 32c.
2. add replay-dual.sh 26 to failover during tar and dbench.
Signed-off-by: wang di <di.wang@intel.com>
Change-Id: I1431bfe8d076a16802d9bba7ca3a7b9d47745f5c
Reviewed-on: http://review.whamcloud.com/15163
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
local fstype=$(facet_fstype $SINGLEMDS)
local mdt_dev=$tmp/mdt
local ost_dev=$tmp/ost
+ local dir
trap 'trap - RETURN; t32_test_cleanup' RETURN
$LFS setdirstripe -D -c2 $tmp/mnt/lustre/remote_dir
+ $r $LCTL set_param -n \
+ mdt.${fsname}*.enable_remote_dir=1 2>/dev/null
+
pushd $tmp/mnt/lustre
tar -cf - . --exclude=./remote_dir |
tar -xvf - -C remote_dir 1>/dev/null || {
echo "list verification skipped"
fi
+ if [ $(lustre_version_code mds1) -ge $(version_code 2.7.50) -a \
+ $dne_upgrade != "no" ]; then
+ $r $LCTL set_param -n \
+ mdt.${fsname}*.enable_remote_dir=1 2>/dev/null
+
+ echo "test migration"
+ pushd $tmp/mnt/lustre
+ # migrate the files/directories to the remote MDT, then
+ # move it back
+ for dir in $(find ! -name .lustre ! -name . -type d); do
+ mdt_index=$($LFS getdirstripe -i $dir)
+ stripe_cnt=$($LFS getdirstripe -c $dir)
+ if [ $mdt_index = 0 -a $stripe_cnt -le 1 ]; then
+ $LFS mv -M 1 $dir || {
+ popd
+ error_noexit "migrate MDT1 failed"
+ return 1
+ }
+ fi
+ done
+
+ for dir in $(find ! -name . ! -name .lustre -type d); do
+ mdt_index=$($LFS getdirstripe -i $dir)
+ stripe_cnt=$($LFS getdirstripe -c $dir)
+ if [ $mdt_index = 1 -a $stripe_cnt -le 1 ]; then
+ $LFS mv -M 0 $dir || {
+ popd
+ error_noexit "migrate MDT0 failed"
+ return 1
+ }
+ fi
+ done
+ popd
+ fi
+
#
# When adding new data verification tests, please check for
# the presence of the required reference files first, like
# Do not support 1_8 and 2_1 direct upgrade to DNE2 anymore */
echo $tarball | grep "1_8" && continue
echo $tarball | grep "2_1" && continue
+ load_modules
dne_upgrade=yes t32_test $tarball writeconf || rc=$?
done
return $rc
}
run_test 25 "replay|resend"
+cleanup_26() {
+ trap 0
+ kill -9 $tar_26_pid
+ kill -9 $dbench_26_jpid
+}
+
+test_26() {
+ local clients=${CLIENTS:-$HOSTNAME}
+
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=600
+ [ "$SLOW" = "no" ] && duration=200
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ local elapsed
+ local start_ts=$(date +%s)
+ local rc=0
+
+ trap cleanup_26 EXIT
+ (
+ local tar_dir=$DIR/$tdir/run_tar
+ while true; do
+ test_mkdir -p -c$MDSCOUNT $tar_dir || break
+ [ $MDSCOUNT -ge 2 ] &&
+ $LFS setdirstripe -D -c$MDSCOUNT $tar_dir ||
+ error "set default dirstripe failed"
+ cd $tar_dir || break
+ tar cf - /etc | tar xf - || error "tar failed"
+ cd $DIR/$tdir || break
+ rm -rf $tar_dir || break
+ done
+ )&
+ tar_26_pid=$!
+ echo "Started tar $tar_26_pid"
+
+ (
+ local dbench_dir=$DIR2/$tdir/run_dbench
+ while true; do
+ test_mkdir -p -c$MDSCOUNT $dbench_dir || break
+ [ $MDSCOUNT -ge 2 ] &&
+ $LFS setdirstripe -D -c$MDSCOUNT $dbench_dir ||
+ error "set default dirstripe failed"
+ cd $dbench_dir || break
+ rundbench 1 -D $dbench_dir -t 100 > /dev/null 2&>1 ||
+ break
+ cd $DIR/$tdir || break
+ rm -rf $dbench_dir || break
+ done
+ )&
+ dbench_26_pid=$!
+ echo "Started dbench $dbench_26_pid"
+
+ elapsed=$(($(date +%s) - start_ts))
+ local num_failovers=0
+ local fail_index=1
+ while [ $elapsed -lt $duration ]; do
+ ps auxwww | grep -v grep | grep -q $tar_26_pid ||
+ error "tar $tar_26_pid stopped"
+ ps auxwww | grep -v grep | grep -q $dbench_26_pid ||
+ error "dbench $dbench_26_pid stopped"
+ sleep 2
+ replay_barrier mds$fail_index
+ sleep 2 # give clients a time to do operations
+ # Increment the number of failovers
+ num_failovers=$((num_failovers+1))
+ log "$TESTNAME fail mds$fail_index $num_failovers times"
+ fail mds$fail_index
+ elapsed=$(($(date +%s) - start_ts))
+ if [ $fail_index -ge $MDSCOUNT ]; then
+ fail_index=1
+ else
+ fail_index=$((fail_index+1))
+ fi
+ done
+ # stop the client loads
+ kill -0 $tar_26_pid || error "tar $tar_26_pid stopped"
+ kill -0 $dbench_26_pid || error "dbench $dbench_26_pid stopped"
+ killall -9 dbench
+ cleanup_26
+ true
+}
+run_test 26 "dbench and tar with mds failover"
+
complete $SECONDS
SLEEP=$((`date +%s` - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
}
run_test 70c "tar ${MDSCOUNT}mdts recovery"
+cleanup_70d() {
+ trap 0
+ kill -9 $mkdir_70d_pid
+}
+
+test_70d () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
+
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ mkdir -p $DIR/$tdir
+
+ local elapsed
+ local start_ts=$(date +%s)
+
+ trap cleanup_70d EXIT
+ (
+ while true; do
+ $LFS mkdir -i0 -c2 $DIR/$tdir/test || {
+ echo "mkdir fails"
+ break
+ }
+ $LFS mkdir -i1 -c2 $DIR/$tdir/test1 || {
+ echo "mkdir fails"
+ break
+ }
+
+ touch $DIR/$tdir/test/a || {
+ echo "touch fails"
+ break;
+ }
+ mkdir $DIR/$tdir/test/b || {
+ echo "mkdir fails"
+ break;
+ }
+ rm -rf $DIR/$tdir/test || {
+ echo "rmdir fails"
+ break
+ }
+
+ touch $DIR/$tdir/test1/a || {
+ echo "touch fails"
+ break;
+ }
+ mkdir $DIR/$tdir/test1/b || {
+ echo "mkdir fails"
+ break;
+ }
+
+ rm -rf $DIR/$tdir/test1 || {
+ echo "rmdir fails"
+ break
+ }
+ done
+ )&
+ mkdir_70d_pid=$!
+ echo "Started $mkdir_70d_pid"
+
+ random_fail_mdt $MDSCOUNT $duration $mkdir_70d_pid
+ kill -0 $mkdir_70d_pid || error "mkdir/rmdir $mkdir_70d_pid stopped"
+
+ cleanup_70d
+ true
+}
+run_test 70d "mkdir/rmdir striped dir ${MDSCOUNT}mdts recovery"
+
+cleanup_70e() {
+ trap 0
+ kill -9 $rename_70e_pid
+}
+
+test_70e () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
+
+ echo ha > /proc/sys/lnet/debug
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i0 $DIR/$tdir/test_0
+ $LFS mkdir -i0 $DIR/$tdir/test_1
+ touch $DIR/$tdir/test_0/a
+ touch $DIR/$tdir/test_1/b
+ trap cleanup_70e EXIT
+ (
+ while true; do
+ mrename $DIR/$tdir/test_0/a $DIR/$tdir/test_1/b > \
+ /dev/null || {
+ echo "a->b fails"
+ break;
+ }
+
+ checkstat $DIR/$tdir/test_0/a && {
+ echo "a still exists"
+ break
+ }
+
+ checkstat $DIR/$tdir/test_1/b || {
+ echo "b still exists"
+ break
+ }
+
+ touch $DIR/$tdir/test_0/a || {
+ echo "touch a fails"
+ break
+ }
+
+ mrename $DIR/$tdir/test_1/b $DIR/$tdir/test_0/a > \
+ /dev/null || {
+ echo "a->a fails"
+ break;
+ }
+ done
+ )&
+ rename_70e_pid=$!
+ echo "Started $rename_70e_pid"
+
+ random_fail_mdt 2 $duration $rename_70e_pid
+ kill -0 $rename_70e_pid || error "rename $rename_70e_pid stopped"
+
+ cleanup_70e
+ true
+}
+run_test 70e "rename cross-MDT with random fails"
+
+
test_73a() {
multiop_bg_pause $DIR/$tfile O_tSc ||
error "multiop_bg_pause $DIR/$tfile failed"