5 LUSTRE=${LUSTRE:-`dirname $0`/..}
6 . $LUSTRE/tests/test-framework.sh
10 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
12 SETUP=${SETUP:-"setup"}
13 CLEANUP=${CLEANUP:-"cleanup"}
14 FORCE=${FORCE:-"--force"}
18 add_mds mds --dev $MDSDEV --size $MDSSIZE
19 if [ ! -z "$mdsfailover_HOST" ]; then
20 add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
23 add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
24 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
25 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover
26 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE --failover
27 add_client client mds --lov lov1 --path $MOUNT
35 # make sure we are using the primary MDS, so the config log will
36 # be able to clean up properly.
37 activemds=`facet_active mds`
38 if [ $activemds != "mds" ]; then
42 umount $MOUNT2 || true
47 stop ost ${FORCE} --dump $TMP/replay-dual-`hostname`.log
50 if [ "$ONLY" == "cleanup" ]; then
51 sysctl -w portals.debug=0
58 start ost --reformat $OSTLCONFARGS
59 start ost2 --reformat $OSTLCONFARGS
60 start mds $MDSLCONFARGS --reformat
61 grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
62 grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
64 # echo $TIMEOUT > /proc/sys/lustre/timeout
68 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
76 checkstat $MOUNT2/a || return 1
77 checkstat $MOUNT1/b || return 2
78 rm $MOUNT2/a $MOUNT1/b
79 checkstat $MOUNT1/a && return 3
80 checkstat $MOUNT2/b && return 4
84 run_test 1 "|X| simple create"
92 checkstat $MOUNT2/adir || return 1
94 checkstat $MOUNT2/adir && return 2
98 run_test 2 "|X| mkdir adir"
103 mkdir $MOUNT2/adir/bdir
106 checkstat $MOUNT2/adir || return 1
107 checkstat $MOUNT1/adir/bdir || return 2
108 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
109 checkstat $MOUNT1/adir && return 3
110 checkstat $MOUNT2/adir/bdir && return 4
114 run_test 3 "|X| mkdir adir, mkdir adir/bdir "
119 mkdir $MOUNT1/adir && return 1
120 mkdir $MOUNT2/adir/bdir
123 checkstat $MOUNT2/adir || return 2
124 checkstat $MOUNT1/adir/bdir || return 3
126 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
127 checkstat $MOUNT1/adir && return 4
128 checkstat $MOUNT2/adir/bdir && return 5
132 run_test 4 "|X| mkdir adir (-EEXIST), mkdir adir/bdir "
136 # multiclient version of replay_single.sh/test_8
138 multiop $MOUNT2/a o_tSc &
140 # give multiop a chance to open
145 wait $pid || return 1
148 [ -e $MOUNT2/a ] && return 2
151 run_test 5 "open, unlink |X| close"
156 multiop $MOUNT2/a o_c &
158 multiop $MOUNT1/a o_c &
160 # give multiop a chance to open
165 wait $pid1 || return 1
169 wait $pid2 || return 1
170 [ -e $MOUNT2/a ] && return 2
173 run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
177 drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1
179 checkstat $MOUNT2/$tfile || return 2
180 rm $MOUNT1/$tfile || return 3
184 run_test 8 "replay of resent request"
188 mcreate $MOUNT1/$tfile-1
189 mcreate $MOUNT2/$tfile-2
190 # drop first reint reply
191 sysctl -w lustre.fail_loc=0x80000119
193 sysctl -w lustre.fail_loc=0
195 rm $MOUNT1/$tfile-[1,2] || return 1
199 run_test 9 "resending a replayed create"
202 mcreate $MOUNT1/$tfile-1
204 munlink $MOUNT1/$tfile-1
205 mcreate $MOUNT2/$tfile-2
206 # drop first reint reply
207 sysctl -w lustre.fail_loc=0x80000119
209 sysctl -w lustre.fail_loc=0
211 checkstat $MOUNT1/$tfile-1 && return 1
212 checkstat $MOUNT1/$tfile-2 || return 2
217 run_test 10 "resending a replayed unlink"
221 mcreate $MOUNT1/$tfile-1
222 mcreate $MOUNT2/$tfile-2
223 mcreate $MOUNT1/$tfile-3
224 mcreate $MOUNT2/$tfile-4
225 mcreate $MOUNT1/$tfile-5
226 # drop all reint replies for a while
227 sysctl -w lustre.fail_loc=0x0119
229 #sleep for while, let both clients reconnect and timeout
230 sleep $((TIMEOUT * 2))
231 sysctl -w lustre.fail_loc=0
233 rm $MOUNT1/$tfile-[1-5] || return 1
237 run_test 11 "both clients timeout during replay"
242 multiop $DIR/$tfile mo_c &
247 sysctl -w lustre.fail_loc=0x80000302
249 df $MOUNT || return 1
250 sysctl -w lustre.fail_loc=0
253 $CHECKSTAT -t file $DIR/$tfile || return 2
254 kill -USR1 $MULTIPID || return 3
255 wait $MULTIPID || return 4
260 run_test 12 "open resend timeout"
263 multiop $DIR/$tfile mo_c &
269 kill -USR1 $MULTIPID || return 3
270 wait $MULTIPID || return 4
273 sysctl -w lustre.fail_loc=0x80000115
275 df $MOUNT || return 1
276 sysctl -w lustre.fail_loc=0
279 $CHECKSTAT -t file $DIR/$tfile || return 2
284 run_test 13 "close resend timeout"
288 createmany -o $MOUNT1/$tfile- 25
289 createmany -o $MOUNT2/$tfile-2- 1
290 createmany -o $MOUNT1/$tfile-3- 25
294 # expect failover to fail
295 df $MOUNT && return 1
298 # first 25 files should have been replayed
299 unlinkmany $MOUNT1/$tfile- 25 || return 2
301 zconf_mount `hostname` $MOUNT2
304 run_test 14 "timeouts waiting for lost client during replay"
308 createmany -o $MOUNT1/$tfile- 25
309 createmany -o $MOUNT2/$tfile-2- 1
313 df $MOUNT || return 1
315 unlinkmany $MOUNT1/$tfile- 25 || return 2
317 zconf_mount `hostname` $MOUNT2
320 run_test 15 "timeout waiting for lost client during replay, 1 client completes"
324 createmany -o $MOUNT1/$tfile- 25
325 createmany -o $MOUNT2/$tfile-2- 1
331 df $MOUNT || return 1
333 unlinkmany $MOUNT1/$tfile- 25 || return 2
335 zconf_mount `hostname` $MOUNT2
339 run_test 16 "fail MDS during recovery (3571)"
342 createmany -o $MOUNT1/$tfile- 25
343 createmany -o $MOUNT2/$tfile-2- 1
345 # Make sure the disconnect is lost
352 df $MOUNT || return 1
354 unlinkmany $MOUNT1/$tfile- 25 || return 2
356 zconf_mount `hostname` $MOUNT2
360 run_test 17 "fail OST during recovery (3571)"
362 # cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for it
365 test_18() { # bug 3822 - evicting client with enqueued lock
367 mkdir -p $MOUNT1/$tdir
368 touch $MOUNT1/$tdir/f0
369 #define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
370 statmany -s $MOUNT1/$tdir/f 1 500 &
373 do_facet mds sysctl -w lustre.fail_loc=0x8000030b # hold enqueue
375 #define OBD_FAIL_LDLM_BL_CALLBACK 0x305
376 do_facet client sysctl -w lustre.fail_loc=0x80000305 # drop cb, evict
378 usleep 500 # wait to ensure first client is one that will be evicted
379 openfile -f O_RDONLY $MOUNT2/$tdir/f0
381 dmesg | grep "entering recovery in server" && \
382 error "client not evicted" || true
384 run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)"
386 if [ "$ONLY" != "setup" ]; then
387 equals_msg test complete, cleaning up
388 SLEEP=$((`date +%s` - $NOW))
389 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
390 FORCE=--force $CLEANUP