4 # For duration of 24 hours repeatedly failover a random MDS at
5 # 10 minute intervals and verify that no application errors occur.
7 # Test runs one of CLIENT_LOAD progs on remote clients.
12 LUSTRE=${LUSTRE:-$(dirname $0)/..}
13 . $LUSTRE/tests/test-framework.sh
14 . $LUSTRE/tests/recovery-scale-lib.sh
18 # bug number for skipped test:
19 ALWAYS_EXCEPT="$RECOVERY_MDS_SCALE_EXCEPT "
20 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
24 remote_mds_nodsh && skip_env "remote MDS with nodsh"
26 if (( CLIENTCOUNT < 3 )); then
27 skip_env "need three or more clients"
30 # SHARED_DIRECTORY should be specified with a shared directory which is
31 # accessable on all of the nodes
32 if [[ -z "$SHARED_DIRECTORY" ]] || ! check_shared_dir "$SHARED_DIRECTORY"; then
33 skip_env "SHARED_DIRECTORY not set"
36 ERRORS_OK="" # No application failures should occur during this test.
38 check_and_setup_lustre
39 rm -rf $DIR/[Rdfs][0-9]*
42 check_progs_installed $NODES_TO_USE "${CLIENT_LOADS[@]}"
44 MAX_RECOV_TIME=$(max_recovery_time)
45 MDTS=$(get_facets MDS)
46 OSTS=$(get_facets OST)
48 # Print informaiton about settings
49 run_info $SERVER_FAILOVER_PERIOD $DURATION $MINSLEEP $SLOW $REQFAIL \
50 $SHARED_DIRECTORY $END_RUN_FILE $LOAD_PID_FILE $VMSTAT_PID_FILE \
51 $CLIENTCOUNT $MDTS $OSTS
54 # failover a random MDS
57 run_test failover_mds "failover MDS"
59 zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
60 client_up || error "start client on $HOSTNAME failed"
63 check_and_cleanup_lustre