6 ALWAYS_EXCEPT="$LARGE_SCALE_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 remote_mds_nodsh && log "SKIP: remote MDS with nodsh" && exit 0
21 [ -n "$CLIENTS" ] || { skip "$0: Need two or more clients" && exit 0; }
22 [ $CLIENTCOUNT -ge 2 ] || \
23 { skip "$0: Need two or more clients, have $CLIENTCOUNT" && exit 0; }
26 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
31 check_and_setup_lustre
32 rm -rf $DIR/[df][0-9]*
34 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
37 assert_env CLIENTS MDSRATE MPIRUN
39 local -a nodes=(${CLIENTS//,/ })
41 # INCREMENT is a number of clients
42 # a half of clients by default
43 increment=${INCREMENT:-$(( CLIENTCOUNT / 2 ))}
45 machinefile=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
46 local LOG=$TMP/${TESTSUITE}_$tfile
48 local var=${SINGLEMDS}_svc
49 local procfile="*.${!var}.recovery_status"
50 local iters=${ITERS:-3}
51 local nfiles=${NFILES:-50000}
52 local nthreads=${THREADS_PER_CLIENT:-3}
54 local IFree=$(inodes_available)
55 [ $IFree -gt $nfiles ] || nfiles=$IFree
67 while [ $num -le $CLIENTCOUNT ]; do
68 list=$(comma_list ${nodes[@]:0:$num})
70 generate_machine_file $list $machinefile ||
71 { error "can not generate machinefile"; exit 1; }
73 for i in $(seq $iters); do
74 mdsrate_cleanup $num $machinefile $nfiles $dir 'f%%d' --ignore
76 COMMAND="${MDSRATE} --create --nfiles $nfiles --dir $dir --filefmt 'f%%d'"
77 mpi_run -np $((num * nthreads)) -machinefile $machinefile ${COMMAND} | tee ${LOG} &
82 # 2 threads 100000 creates 117 secs
85 log "$i : Starting failover on $SINGLEMDS"
86 facet_failover $SINGLEMDS
87 if ! wait_recovery_complete $SINGLEMDS $((TIMEOUT * 10)); then
88 echo "$SINGLEMDS recovery is not completed!"
93 duration=$(do_facet $SINGLEMDS lctl get_param -n $procfile | grep recovery_duration)
95 res=( "${res[@]}" "$num" )
96 res=( "${res[@]}" "$duration" )
97 echo "RECOVERY TIME: NFILES=$nfiles number of clients: $num $duration"
101 num=$((num + increment))
104 mdsrate_cleanup $num $machinefile $nfiles $dir 'f%%d' --ignore
107 while [ $i -lt ${#res[@]} ]; do
108 echo "RECOVERY TIME: NFILES=$nfiles number of clients: ${res[i]} ${res[i+1]}"
113 run_test 3a "recovery time, $CLIENTCOUNT clients"
115 equals_msg `basename $0`: test complete, cleaning up
116 check_and_cleanup_lustre
117 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true