5 PTLDEBUG=${PTLDEBUG:--1}
9 LUSTRE=${LUSTRE:-$(dirname $0)/..}
10 . $LUSTRE/tests/test-framework.sh
14 ALWAYS_EXCEPT="$LARGE_SCALE_EXCEPT "
18 remote_mds_nodsh && skip "remote MDS with nodsh"
20 [ $CLIENTCOUNT -lt 2 ] &&
21 skip_env "$TESTSUITE: Need 2+ clients, have only $CLIENTCOUNT"
25 check_and_setup_lustre
26 rm -rf $DIR/[df][0-9]*
28 get_mpiuser_id $MPI_USER
29 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
30 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
32 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
35 assert_env CLIENTS MDSRATE MPIRUN
37 local -a nodes=(${CLIENTS//,/ })
38 # INCREMENT is a number of clients a half of clients by default
39 local increment=${INCREMENT:-$(( CLIENTCOUNT / 2 ))}
41 local LOG=$TMP/${TESTSUITE}_$tfile
42 local var=${SINGLEMDS}_svc
43 local procfile="*.${!var}.recovery_status"
44 local iters=${ITERS:-3}
45 local nfiles=${NFILES:-50000}
46 local nthreads=${THREADS_PER_CLIENT:-3}
47 local IFree=$(inodes_available)
51 local dir=$DIR/d0.$TESTNAME
53 [ $IFree -gt $nfiles ] || nfiles=$IFree
58 while [ $num -le $CLIENTCOUNT ]; do
59 list=$(comma_list ${nodes[@]:0:$num})
61 generate_machine_file $list $MACHINEFILE ||
62 error "can not generate machinefile"
64 for i in $(seq $iters); do
65 mdsrate_cleanup $num $MACHINEFILE $nfiles $dir 'f%%d' \
68 COMMAND="${MDSRATE} --create --nfiles $nfiles --dir
69 $dir --filefmt 'f%%d'"
70 mpi_run ${MACHINEFILE_OPTION} $MACHINEFILE \
71 -np $((num * nthreads)) ${COMMAND} | tee ${LOG}&
76 # 2 threads 100000 creates 117 secs
79 log "$i : Starting failover on $SINGLEMDS"
80 facet_failover $SINGLEMDS
81 if ! wait_recovery_complete $SINGLEMDS \
82 $((TIMEOUT * 10)); then
83 echo "$SINGLEMDS recovery is not completed!"
88 duration=$(do_facet $SINGLEMDS lctl get_param -n \
89 $procfile | grep recovery_duration)
91 res=( "${res[@]}" "$num" )
92 res=( "${res[@]}" "$duration" )
93 echo "RECOVERY TIME: NFILES=$nfiles number of clients: $num $duration"
96 num=$((num + increment))
99 mdsrate_cleanup $num $MACHINEFILE $nfiles $dir 'f%%d' --ignore
102 while [ $i -lt ${#res[@]} ]; do
103 echo "RECOVERY TIME: NFILES=$nfiles number of clients: ${res[i]} ${res[i+1]}"
108 run_test 3a "recovery time, $CLIENTCOUNT clients"
111 check_and_cleanup_lustre