Whamcloud - gitweb
LU-13275 tests: enhance racer with failover
[fs/lustre-release.git] / lustre / tests / racer.sh
1 #!/bin/bash
2 #set -vx
3 set -e
4
5 ONLY=${ONLY:-"$*"}
6 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
7 . $LUSTRE/tests/test-framework.sh
8 init_test_env $@
9 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
10 init_logging
11
12 racer=$LUSTRE/tests/racer/racer.sh
13 echo racer: $racer with $MDSCOUNT MDTs
14
15 if [ "$SLOW" = "no" ]; then
16     DURATION=${DURATION:-300}
17 else
18     DURATION=${DURATION:-900}
19 fi
20 MOUNT_2=${MOUNT_2:-"yes"}
21
22 build_test_filter
23 check_and_setup_lustre
24
25 CLIENTS=${CLIENTS:-$HOSTNAME}
26 RACERDIRS=${RACERDIRS:-"$DIR $DIR2"}
27 echo RACERDIRS=$RACERDIRS
28
29 RACER_FAILOVER=${RACER_FAILOVER:-false}
30 FAIL_TARGETS=${FAIL_TARGETS:-"MDS OST"}
31 RACER_FAILOVER_PERIOD=${RACER_FAILOVER_PERIOD:-60}
32
33 if $RACER_FAILOVER; then
34         declare -a  victims
35         for target in $FAIL_TARGETS; do
36                 victims=(${victims[@]} $(get_facets $target))
37         done
38         echo Victim facets ${victims[@]}
39 fi
40
41 #LU-4684
42 RACER_ENABLE_MIGRATION=false
43
44 if ((MDSCOUNT > 1 && "$MDS1_VERSION" >= $(version_code 2.8.0))); then
45         RACER_ENABLE_REMOTE_DIRS=${RACER_ENABLE_REMOTE_DIRS:-true}
46         RACER_ENABLE_STRIPED_DIRS=${RACER_ENABLE_STRIPED_DIRS:-true}
47         RACER_ENABLE_MIGRATION=${RACER_ENABLE_MIGRATION:-true}
48 elif ((MDSCOUNT > 1 && "$MDS1_VERSION" >= $(version_code 2.5.0))); then
49         RACER_ENABLE_REMOTE_DIRS=${RACER_ENABLE_REMOTE_DIRS:-true}
50 fi
51
52 [[ "$MDS1_VERSION" -lt $(version_code 2.9.54) ||
53    $(facet_fstype mgs) != zfs ]] && RACER_ENABLE_SNAPSHOT=false
54
55 [[ "$MDS1_VERSION" -le $(version_code 2.9.55) ]] &&
56         RACER_ENABLE_PFL=false
57
58 [[ "$MDS1_VERSION" -le $(version_code 2.10.53) ]] &&
59         RACER_ENABLE_DOM=false
60
61 [[ "$MDS1_VERSION" -lt $(version_code 2.10.55) ]] &&
62         RACER_ENABLE_FLR=false
63
64 RACER_ENABLE_REMOTE_DIRS=${RACER_ENABLE_REMOTE_DIRS:-false}
65 RACER_ENABLE_STRIPED_DIRS=${RACER_ENABLE_STRIPED_DIRS:-false}
66 RACER_ENABLE_MIGRATION=${RACER_ENABLE_MIGRATION:-false}
67 RACER_ENABLE_SNAPSHOT=${RACER_ENABLE_SNAPSHOT:-true}
68 RACER_ENABLE_PFL=${RACER_ENABLE_PFL:-true}
69 RACER_ENABLE_DOM=${RACER_ENABLE_DOM:-true}
70 RACER_ENABLE_FLR=${RACER_ENABLE_FLR:-true}
71
72 fail_random_facet () {
73         local facets=${victims[@]}
74         facets=${facets// /,}
75
76         sleep $RACER_FAILOVER_PERIOD
77         while [ ! -f $racer_done ]; do
78                 local facet=$(get_random_entry $facets)
79                 facet_failover $facet
80                 sleep $RACER_FAILOVER_PERIOD
81         done
82 }
83
84 # run racer
85 test_1() {
86         local rrc=0
87         local rc=0
88         local clients=$CLIENTS
89         local RDIRS
90         local i
91         local racer_done=$TMP/racer_done
92
93         rm -f $racer_done
94
95         for d in ${RACERDIRS}; do
96                 is_mounted $d || continue
97
98                 RDIRS="$RDIRS $d/racer"
99                 mkdir -p $d/racer
100         #       lfs setstripe $d/racer -c -1
101                 if [ $MDSCOUNT -ge 2 ]; then
102                         for i in $(seq $((MDSCOUNT - 1))); do
103                                 RDIRS="$RDIRS $d/racer$i"
104                                 if [ ! -e $d/racer$i ]; then
105                                         $LFS mkdir -i $i $d/racer$i ||
106                                                 error "lfs mkdir $i failed"
107                                 fi
108                         done
109                 fi
110         done
111
112         local rpids=""
113         for rdir in $RDIRS; do
114                 do_nodes $clients "DURATION=$DURATION \
115                         MDSCOUNT=$MDSCOUNT OSTCOUNT=$OSTCOUNT\
116                         RACER_ENABLE_REMOTE_DIRS=$RACER_ENABLE_REMOTE_DIRS \
117                         RACER_ENABLE_STRIPED_DIRS=$RACER_ENABLE_STRIPED_DIRS \
118                         RACER_ENABLE_MIGRATION=$RACER_ENABLE_MIGRATION \
119                         RACER_ENABLE_PFL=$RACER_ENABLE_PFL \
120                         RACER_ENABLE_DOM=$RACER_ENABLE_DOM \
121                         RACER_ENABLE_FLR=$RACER_ENABLE_FLR \
122                         RACER_MAX_CLEANUP_WAIT=$RACER_MAX_CLEANUP_WAIT \
123                         LFS=$LFS \
124                         LCTL=$LCTL \
125                         $racer $rdir $NUM_RACER_THREADS" &
126                 pid=$!
127                 rpids="$rpids $pid"
128         done
129
130         local failpid=""
131         if $RACER_FAILOVER; then
132                 fail_random_facet &
133                 failpid=$!
134                 echo racers failpid: $failpid
135         fi
136
137         local lss_pids=""
138         if $RACER_ENABLE_SNAPSHOT; then
139                 lss_gen_conf
140
141                 $LUSTRE/tests/racer/lss_create.sh &
142                 pid=$!
143                 lss_pids="$lss_pids $pid"
144
145                 $LUSTRE/tests/racer/lss_destroy.sh &
146                 pid=$!
147                 lss_pids="$lss_pids $pid"
148         fi
149
150         echo racers pids: $rpids
151         for pid in $rpids; do
152                 wait $pid
153                 rc=$?
154                 echo "pid=$pid rc=$rc"
155                 if [ $rc != 0 ]; then
156                     rrc=$((rrc + 1))
157                 fi
158         done
159
160         if $RACER_FAILOVER; then
161                 touch $racer_done
162                 wait $failpid
163                 rrc=$((rrc + $?))
164         fi
165
166         if $RACER_ENABLE_SNAPSHOT; then
167                 killall -q lss_create.sh
168                 killall -q lss_destroy.sh
169
170                 for pid in $lss_pids; do
171                         wait $pid
172                 done
173
174                 lss_cleanup
175         fi
176
177         return $rrc
178 }
179 run_test 1 "racer on clients: ${CLIENTS:-$(hostname)} DURATION=$DURATION"
180
181 complete $SECONDS
182 check_and_cleanup_lustre
183 exit_status