remote_mds_nodsh && log "SKIP: remote MDS with nodsh" && exit 0
-[ -n "$CLIENTS" ] || { skip "$0: Need two or more clients" && exit 0; }
+[ -n "$CLIENTS" ] || { skip_env "$0: Need two or more clients" && exit 0; }
[ $CLIENTCOUNT -ge 2 ] || \
- { skip "$0: Need two or more clients, have $CLIENTCOUNT" && exit 0; }
+ { skip_env "$0: Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
#
[ "$SLOW" = "no" ] && EXCEPT_SLOW=""
}
check_vbr || \
- { skip "$0: no version_recovery" && exit 0; }
+ { skip_env "$0: no version_recovery" && exit 0; }
FAKE_NUM_MAX=${FAKE_NUM_MAX:-1000}
[ "$SLOW" = "no" ] && FAKE_NUM_MAX=100
-do_and_time () {
- local cmd=$1
-
- local start_ts=`date +%s`
-
- $cmd
-
- local current_ts=`date +%s`
- ELAPSED=`expr $current_ts - $start_ts`
- echo "===== START $start_ts CURRENT $current_ts"
-}
-
delete_fake_exports () {
NUM=$(do_facet mds "lctl get_param -n mds.${mds_svc}.stale_exports|wc -l")
}
test_1b() {
+ delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
+
local FAKE_NUM
local NUM
NUM=$(do_facet mds "lctl get_param -n mds.${mds_svc}.stale_exports|wc -l")
[ $NUM -lt $FAKE_NUM ] && error "fake exports $NUM -ne $FAKE_NUM"
echo "===== STALE EXPORTS: FAKE_NUM=$FAKE_NUM NUM=$NUM"
- do_and_time "zconf_mount_clients $CLIENTS $DIR"
- echo "==== $TESTNAME ===== CONNECTION TIME $ELAPSED: FAKE_NUM=$FAKE_NUM CLIENTCOUNT=$CLIENTCOUNT"
+ local elapsed=$(do_and_time "zconf_mount_clients $CLIENTS $DIR")
+ echo "==== $TESTNAME ===== CONNECTION TIME $elapsed: FAKE_NUM=$FAKE_NUM CLIENTCOUNT=$CLIENTCOUNT"
# do_facet mds "lctl set_param mds.${mds_svc}.flush_stale_exports=1"
delete_fake_exports
}
test_1c() {
+ delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
+
zconf_mount_clients $CLIENTS $DIR
# sanity mds fail (to exclude the recults on fresh formatted fs)
}
run_test 1c "VBR: recovery $CLIENTCOUNT clients with delayed exports"
-
test_1d() {
+ delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
+
local FAKE_NUM
local NUM
EX_NUM=$(do_facet mds "lctl get_param -n mds.${mds_svc}.stale_exports|grep -c EXPIRED")
[ "$EX_NUM" -eq "$NUM" ] || error "not all exports are expired $EX_NUM != $NUM"
- do_and_time "zconf_mount_clients $CLIENTS $DIR"
- echo "==== $TESTNAME===== CONNECTION TIME $ELAPSED: expired FAKE_NUM=$FAKE_NUM CLIENTCOUNT=$CLIENTCOUNT"
+ local elapsed=$(do_and_time "zconf_mount_clients $CLIENTS $DIR")
+ echo "==== $TESTNAME===== CONNECTION TIME $elapsed: expired FAKE_NUM=$FAKE_NUM CLIENTCOUNT=$CLIENTCOUNT"
do_facet mds "lctl set_param mds.${mds_svc}.stale_export_age=$OLD_AGE"
done
run_test 1d "VBR: expire exports, connect $CLIENTCOUNT clients"
# VBR scale tests end
+test_3a() {
+ assert_env CLIENTS MDSRATE MPIRUN
+
+ local -a nodes=(${CLIENTS//,/ })
+
+ # INCREMENT is a number of clients
+ # a half of clients by default
+ increment=${INCREMENT:-$(( CLIENTCOUNT / 2 ))}
+
+ machinefile=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
+ local LOG=$TMP/${TESTSUITE}_$tfile
+
+ local var=mds_svc
+ local procfile="*.${!var}.recovery_status"
+ local iters=${ITERS:-3}
+ local nfiles=${NFILES:-50000}
+ local nthreads=${THREADS_PER_CLIENT:-3}
+
+ local IFree=$(inodes_available)
+ [ $IFree -gt $nfiles ] || nfiles=$IFree
+
+ local dir=$DIR/$tdir
+ mkdir -p $dir
+ chmod 0777 $dir
+
+ local pid
+ local list
+ local -a res
+
+ local num=$increment
+
+ while [ $num -le $CLIENTCOUNT ]; do
+ list=$(comma_list ${nodes[@]:0:$num})
+
+ generate_machine_file $list $machinefile ||
+ { error "can not generate machinefile"; exit 1; }
+
+ for i in $(seq $iters); do
+ mdsrate_cleanup $num $machinefile $nfiles $dir 'f%%d' --ignore
+
+ COMMAND="${MDSRATE} --create --nfiles $nfiles --dir $dir --filefmt 'f%%d'"
+ mpi_run -np $((num * nthreads)) -machinefile $machinefile ${COMMAND} | tee ${LOG} &
+
+ pid=$!
+ echo "pid=$pid"
+
+ # 2 threads 100000 creates 117 secs
+ sleep 20
+
+ log "$i : Starting failover on mds"
+ facet_failover mds
+ if ! wait_recovery_complete mds $((TIMEOUT * 10)); then
+ echo "mds recovery is not completed!"
+ kill -9 $pid
+ exit 7
+ fi
+
+ duration=$(do_facet mds lctl get_param -n $procfile | grep recovery_duration)
+
+ res=( "${res[@]}" "$num" )
+ res=( "${res[@]}" "$duration" )
+ echo "RECOVERY TIME: NFILES=$nfiles number of clients: $num $duration"
+ wait $pid
+
+ done
+ num=$((num + increment))
+ done
+
+ mdsrate_cleanup $num $machinefile $nfiles $dir 'f%%d' --ignore
+
+ i=0
+ while [ $i -lt ${#res[@]} ]; do
+ echo "RECOVERY TIME: NFILES=$nfiles number of clients: ${res[i]} ${res[i+1]}"
+ i=$((i+2))
+ done
+}
+
+run_test 3a "recovery time, $CLIENTCOUNT clients"
+
equals_msg `basename $0`: test complete, cleaning up
check_and_cleanup_lustre
[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true