Whamcloud - gitweb
LU-15626 tests: Fix "error" reported by shellcheck(2)
[fs/lustre-release.git] / lustre / tests / large-scale.sh
1 #!/bin/bash
2
3 set -e
4
5 PTLDEBUG=${PTLDEBUG:--1}
6 SETUP=${SETUP:-""}
7 CLEANUP=${CLEANUP:-""}
8
9 LUSTRE=${LUSTRE:-$(dirname $0)/..}
10 . $LUSTRE/tests/test-framework.sh
11 init_test_env "$@"
12 init_logging
13
14 ALWAYS_EXCEPT="$LARGE_SCALE_EXCEPT "
15
16 build_test_filter
17
18 remote_mds_nodsh && skip "remote MDS with nodsh"
19
20 [ $CLIENTCOUNT -lt 2 ] &&
21         skip_env "$TESTSUITE: Need 2+ clients, have only $CLIENTCOUNT"
22
23 MOUNT_2=""
24
25 check_and_setup_lustre
26 rm -rf $DIR/[df][0-9]*
27
28 get_mpiuser_id $MPI_USER
29 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
30 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
31
32 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
33
34 test_3a() {
35         assert_env CLIENTS MDSRATE MPIRUN
36
37         local -a nodes=(${CLIENTS//,/ })
38         # INCREMENT is a number of clients a half of clients by default
39         local increment=${INCREMENT:-$(( CLIENTCOUNT / 2 ))}
40         local num=$increment
41         local LOG=$TMP/${TESTSUITE}_$tfile
42         local var=${SINGLEMDS}_svc
43         local procfile="*.${!var}.recovery_status"
44         local iters=${ITERS:-3}
45         local nfiles=${NFILES:-50000}
46         local nthreads=${THREADS_PER_CLIENT:-3}
47         local IFree=$(inodes_available)
48         local pid
49         local list
50         local -a res
51         local dir=$DIR/d0.$TESTNAME
52
53         [ $IFree -gt $nfiles ] || nfiles=$IFree
54
55         mkdir -p $dir
56         chmod 0777 $dir
57
58         while [ $num -le $CLIENTCOUNT ]; do
59                 list=$(comma_list "${nodes[@]:0:$num}")
60
61                 generate_machine_file $list $MACHINEFILE ||
62                         error "can not generate machinefile"
63
64                 for i in $(seq $iters); do
65                         mdsrate_cleanup $num $MACHINEFILE $nfiles $dir 'f%%d' \
66                                 --ignore
67
68                         COMMAND="${MDSRATE} --create --nfiles $nfiles --dir
69                                  $dir --filefmt 'f%%d'"
70                         mpi_run ${MACHINEFILE_OPTION} $MACHINEFILE \
71                                 -np $((num * nthreads)) ${COMMAND} | tee ${LOG}&
72
73                         pid=$!
74                         echo "pid=$pid"
75
76                         # 2 threads 100000 creates 117 secs
77                         sleep 20
78
79                         log "$i : Starting failover on $SINGLEMDS"
80                         facet_failover $SINGLEMDS
81                         if ! wait_recovery_complete $SINGLEMDS \
82                              $((TIMEOUT * 10)); then
83                                 echo "$SINGLEMDS recovery is not completed!"
84                                 kill -9 $pid
85                                 exit 7
86                         fi
87
88                         duration=$(do_facet $SINGLEMDS lctl get_param -n \
89                                 $procfile | grep recovery_duration)
90
91                         res=( "${res[@]}" "$num" )
92                         res=( "${res[@]}" "$duration" )
93                         echo "RECOVERY TIME: NFILES=$nfiles number of clients: $num $duration"
94                         wait $pid
95                 done
96                 num=$((num + increment))
97         done
98
99         mdsrate_cleanup $num $MACHINEFILE $nfiles $dir 'f%%d' --ignore
100
101         i=0
102         while [ $i -lt ${#res[@]} ]; do
103                 echo "RECOVERY TIME: NFILES=$nfiles number of clients: ${res[i]}  ${res[i+1]}"
104                 i=$((i+2))
105         done
106 }
107
108 run_test 3a "recovery time, $CLIENTCOUNT clients"
109
110 complete $SECONDS
111 check_and_cleanup_lustre
112 exit_status