5 PTLDEBUG=${PTLDEBUG:--1}
9 LUSTRE=${LUSTRE:-$(dirname $0)/..}
10 . $LUSTRE/tests/test-framework.sh
14 ALWAYS_EXCEPT="$LARGE_SCALE_EXCEPT "
18 remote_mds_nodsh && skip "remote MDS with nodsh"
20 [ -z "$CLIENTS" ] && skip_env "$TESTSUITE: Need two or more clients"
21 [ $CLIENTCOUNT -lt 2 ] &&
22 skip_env "$TESTSUITE: Need 2+ clients, have only $CLIENTCOUNT"
26 check_and_setup_lustre
27 rm -rf $DIR/[df][0-9]*
29 get_mpiuser_id $MPI_USER
30 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
31 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
33 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
36 assert_env CLIENTS MDSRATE MPIRUN
38 local -a nodes=(${CLIENTS//,/ })
39 # INCREMENT is a number of clients a half of clients by default
40 local increment=${INCREMENT:-$(( CLIENTCOUNT / 2 ))}
42 local LOG=$TMP/${TESTSUITE}_$tfile
43 local var=${SINGLEMDS}_svc
44 local procfile="*.${!var}.recovery_status"
45 local iters=${ITERS:-3}
46 local nfiles=${NFILES:-50000}
47 local nthreads=${THREADS_PER_CLIENT:-3}
48 local IFree=$(inodes_available)
52 local dir=$DIR/d0.$TESTNAME
54 [ $IFree -gt $nfiles ] || nfiles=$IFree
59 while [ $num -le $CLIENTCOUNT ]; do
60 list=$(comma_list ${nodes[@]:0:$num})
62 generate_machine_file $list $MACHINEFILE ||
63 error "can not generate machinefile"
65 for i in $(seq $iters); do
66 mdsrate_cleanup $num $MACHINEFILE $nfiles $dir 'f%%d' \
69 COMMAND="${MDSRATE} --create --nfiles $nfiles --dir
70 $dir --filefmt 'f%%d'"
71 mpi_run ${MACHINEFILE_OPTION} $MACHINEFILE \
72 -np $((num * nthreads)) ${COMMAND} | tee ${LOG}&
77 # 2 threads 100000 creates 117 secs
80 log "$i : Starting failover on $SINGLEMDS"
81 facet_failover $SINGLEMDS
82 if ! wait_recovery_complete $SINGLEMDS \
83 $((TIMEOUT * 10)); then
84 echo "$SINGLEMDS recovery is not completed!"
89 duration=$(do_facet $SINGLEMDS lctl get_param -n \
90 $procfile | grep recovery_duration)
92 res=( "${res[@]}" "$num" )
93 res=( "${res[@]}" "$duration" )
94 echo "RECOVERY TIME: NFILES=$nfiles number of clients: $num $duration"
97 num=$((num + increment))
100 mdsrate_cleanup $num $MACHINEFILE $nfiles $dir 'f%%d' --ignore
103 while [ $i -lt ${#res[@]} ]; do
104 echo "RECOVERY TIME: NFILES=$nfiles number of clients: ${res[i]} ${res[i+1]}"
109 run_test 3a "recovery time, $CLIENTCOUNT clients"
112 check_and_cleanup_lustre