3 # client failure does not affect other clients
5 # Start load on clients (each client works on it's own directory).
6 # At defined (5-10 minutes) interval fail one random client and then fail mds.
7 # Reintegrate failed client after recovery completed,
8 # application errors are allowed for that client but not on other clients.
9 # 10 minute intervals and verify that no application errors occur.
11 # Test runs one of CLIENT_LOAD progs on remote clients.
16 LUSTRE=${LUSTRE:-$(dirname $0)/..}
17 . $LUSTRE/tests/test-framework.sh
21 # bug number for skipped test:
22 ALWAYS_EXCEPT="$RECOVERY_RANDOM_SCALE_EXCEPT"
23 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
27 remote_mds_nodsh && skip_env "remote MDS with nodsh"
28 remote_ost_nodsh && skip_env "remote OST with nodsh"
30 [ $CLIENTCOUNT -lt 3 ] &&
31 skip_env "need three or more clients"
33 if [ -z "$SHARED_DIRECTORY" ] || ! check_shared_dir $SHARED_DIRECTORY; then
34 skip_env "SHARED_DIRECTORY should be specified with a shared directory \
35 which is accessible on all of the nodes"
38 [[ $FAILURE_MODE = SOFT ]] && \
39 log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
41 # Application failures are allowed for the failed client
42 # but not for other clients.
45 if [ "$SLOW" = "no" ]; then
46 DURATION=${DURATION:-$((60 * 30))}
48 DURATION=${DURATION:-$((60 * 60 * 24))}
50 SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
52 MINSLEEP=${MINSLEEP:-120}
53 REQFAIL_PERCENT=${REQFAIL_PERCENT:-3} # bug17839 comment 62
54 # round up the result of integer division: C=(A + (B - 1)) / B
55 REQFAIL=${REQFAIL:-$(((DURATION * REQFAIL_PERCENT + (SERVER_FAILOVER_PERIOD *
56 100 - 1 )) / SERVER_FAILOVER_PERIOD / 100))}
58 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
59 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
60 VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
66 for facet in ${MDTS//,/ } ${FAILED_CLIENTS//,/ }; do
67 var=$(node_var_name $facet)_nums
70 echo "$facet failed over $val times"
75 summary_and_cleanup () {
78 # Having not empty END_RUN_FILE means the failed loads only
79 if [ -s $END_RUN_FILE ]; then
80 print_end_run_file $END_RUN_FILE
84 echo $(date +'%F %H:%M:%S') Terminating clients loads ...
85 echo "$0" >> $END_RUN_FILE
87 [ $rc -eq 0 ] || result=FAIL
89 log "Duration: $DURATION
90 Server failover period: $SERVER_FAILOVER_PERIOD seconds
91 Exited after: $ELAPSED seconds
92 Number of failovers before exit:
94 Status: $result: rc=$rc"
96 # stop vmstat on OSS nodes
98 stop_process $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
100 # stop the client loads
101 stop_client_loads $NODES_TO_USE $LOAD_PID_FILE
103 if [ $rc -ne 0 ]; then
104 # we are interested in only on failed clients and servers
105 local failedclients=$(cat $END_RUN_FILE | grep -v $0)
106 gather_logs $(comma_list $(all_server_nodes) $failedclients)
112 ################################## Main Flow ###################################
113 check_and_setup_lustre
114 rm -rf $DIR/[Rdfs][0-9]*
116 MAX_RECOV_TIME=$(max_recovery_time)
118 # The test node needs to be insulated from a lustre failure as much as possible,
119 # so not even loading the lustre modules is ideal.
121 # -- remove hostname from clients list
122 zconf_umount $HOSTNAME $MOUNT
123 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
124 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $HOSTNAME)
126 check_progs_installed $NODES_TO_USE "${CLIENT_LOADS[@]}"
128 MDTS=$(get_facets MDS)
130 # Fail a random client and then failover a random MDS.
131 test_fail_client_mds() {
137 stack_trap summary_and_cleanup EXIT INT
139 # start vmstat on OSS nodes
141 start_vmstat $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
145 start_client_loads $NODES_TO_USE
147 echo client loads pids:
148 do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE" || exit 3
154 local start_ts=$(date +%s)
155 local current_ts=$start_ts
157 while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
158 # In order to perform the
159 # expected number of failovers, we need to account the
161 # 1) the time that has elapsed during the client load checking
162 # 2) time takes for failover
163 it_time_start=$(date +%s)
165 fail_client=$(get_random_entry $NODES_TO_USE)
166 client_var=$(node_var_name $fail_client)_nums
168 # store the list of failed clients
169 # lists are comma separated
170 FAILED_CLIENTS=$(expand_list $FAILED_CLIENTS $fail_client)
172 serverfacet=$(get_random_entry $MDTS)
173 var=$(node_var_name $serverfacet)_nums
175 # Check that our client loads are still running. If any have
176 # died, that means they have died outside of recovery, which
178 log "==== Checking clients loads BEFORE failover -- failure NOT OK \
179 ELAPSED=$ELAPSED DURATION=$DURATION \
180 PERIOD=$SERVER_FAILOVER_PERIOD"
181 check_client_loads $NODES_TO_USE || exit 4
183 log "FAIL CLIENT $fail_client..."
184 shutdown_client $fail_client
186 log "Starting failover on $serverfacet"
187 facet_failover "$serverfacet" || exit 1
189 if ! wait_recovery_complete $serverfacet; then
190 echo "$serverfacet recovery is not completed!"
194 boot_node $fail_client
195 echo "Reintegrating $fail_client"
196 zconf_mount $fail_client $MOUNT || exit $?
197 client_up $fail_client || exit $?
199 # Increment the number of failovers
202 val=$((${!client_var} + 1))
203 eval $client_var=$val
205 # load script on failed clients could create END_RUN_FILE
206 # We shuold remove it and ignore the failure if this
207 # file contains the failed client only.
208 # We can not use ERRORS_OK when start all loads at the start of
209 # this script because the application errors allowed for random
210 # failed client only, but not for all clients.
211 if [ -e $END_RUN_FILE ]; then
213 read end_run_node < $END_RUN_FILE
214 [[ $end_run_node = $fail_client ]] &&
215 rm -f $END_RUN_FILE || exit 13
218 restart_client_loads $fail_client $ERRORS_OK || exit $?
220 # Check that not failed clients loads are still running.
221 # No application failures should occur on clients that were
223 log "==== Checking clients loads AFTER failed client reintegrated \
225 if ! ERRORS_OK= check_client_loads \
226 $(exclude_items_from_list $NODES_TO_USE $fail_client); then
227 log "Client load failed. Exiting..."
231 current_ts=$(date +%s)
232 ELAPSED=$((current_ts - start_ts))
233 sleep=$((SERVER_FAILOVER_PERIOD - (current_ts - it_time_start)))
235 # Keep counting the number of iterations when
236 # time spent to failover and two client loads check exceeded
237 # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP ).
238 if [ $sleep -lt $MINSLEEP ]; then
239 reqfail=$((reqfail + 1))
240 log "WARNING: failover, client reintegration and \
241 check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP!
242 Failed to load the filesystem with I/O for a minimum period of \
243 $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
244 This iteration, the load was only applied for sleep=$sleep seconds.
245 Estimated max recovery time : $MAX_RECOV_TIME
246 Probably the hardware is taking excessively long time to boot.
247 Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), \
249 [ $reqfail -gt $REQFAIL ] && exit 6
252 log "Number of failovers:
253 $(numfailovers) and counting..."
255 [ $((ELAPSED + sleep)) -ge $DURATION ] && break
257 if [ $sleep -gt 0 ]; then
258 echo "sleeping $sleep seconds... "
264 run_test fail_client_mds "fail client, then failover MDS"
266 zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
267 client_up || error "start client on $HOSTNAME failed"
270 check_and_cleanup_lustre