5 LUSTRE=${LUSTRE:-`dirname $0`/..}
6 . $LUSTRE/tests/test-framework.sh
10 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
12 SETUP=${SETUP:-"setup"}
13 CLEANUP=${CLEANUP:-"cleanup"}
17 add_mds mds --dev $MDSDEV --size $MDSSIZE
18 if [ ! -z "$mdsfailover_HOST" ]; then
19 add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
22 add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
23 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
24 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE --failover
25 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE --failover
26 add_client client mds --lov lov1 --path $MOUNT
34 # make sure we are using the primary MDS, so the config log will
35 # be able to clean up properly.
36 activemds=`facet_active mds`
37 if [ $activemds != "mds" ]; then
41 umount $MOUNT2 || true
46 stop ost ${FORCE} --dump $TMP/replay-dual-`hostname`.log
49 if [ "$ONLY" == "cleanup" ]; then
50 sysctl -w portals.debug=0
57 start ost --reformat $OSTLCONFARGS
58 start ost2 --reformat $OSTLCONFARGS
59 start mds $MDSLCONFARGS --reformat
60 grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
61 grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
63 # echo $TIMEOUT > /proc/sys/lustre/timeout
67 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
75 checkstat $MOUNT2/a || return 1
76 checkstat $MOUNT1/b || return 2
77 rm $MOUNT2/a $MOUNT1/b
78 checkstat $MOUNT1/a && return 3
79 checkstat $MOUNT2/b && return 4
83 run_test 1 "|X| simple create"
91 checkstat $MOUNT2/adir || return 1
93 checkstat $MOUNT2/adir && return 2
97 run_test 2 "|X| mkdir adir"
102 mkdir $MOUNT2/adir/bdir
105 checkstat $MOUNT2/adir || return 1
106 checkstat $MOUNT1/adir/bdir || return 2
107 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
108 checkstat $MOUNT1/adir && return 3
109 checkstat $MOUNT2/adir/bdir && return 4
113 run_test 3 "|X| mkdir adir, mkdir adir/bdir "
118 mkdir $MOUNT1/adir && return 1
119 mkdir $MOUNT2/adir/bdir
122 checkstat $MOUNT2/adir || return 2
123 checkstat $MOUNT1/adir/bdir || return 3
125 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
126 checkstat $MOUNT1/adir && return 4
127 checkstat $MOUNT2/adir/bdir && return 5
131 run_test 4 "|X| mkdir adir (-EEXIST), mkdir adir/bdir "
135 # multiclient version of replay_single.sh/test_8
137 multiop $MOUNT2/a o_tSc &
139 # give multiop a chance to open
144 wait $pid || return 1
147 [ -e $MOUNT2/a ] && return 2
150 run_test 5 "open, unlink |X| close"
155 multiop $MOUNT2/a o_c &
157 multiop $MOUNT1/a o_c &
159 # give multiop a chance to open
164 wait $pid1 || return 1
168 wait $pid2 || return 1
169 [ -e $MOUNT2/a ] && return 2
172 run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
176 drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1
178 checkstat $MOUNT2/$tfile || return 2
179 rm $MOUNT1/$tfile || return 3
183 run_test 8 "replay of resent request"
187 mcreate $MOUNT1/$tfile-1
188 mcreate $MOUNT2/$tfile-2
189 # drop first reint reply
190 sysctl -w lustre.fail_loc=0x80000119
192 sysctl -w lustre.fail_loc=0
194 rm $MOUNT1/$tfile-[1,2] || return 1
198 run_test 9 "resending a replayed create"
201 mcreate $MOUNT1/$tfile-1
203 munlink $MOUNT1/$tfile-1
204 mcreate $MOUNT2/$tfile-2
205 # drop first reint reply
206 sysctl -w lustre.fail_loc=0x80000119
208 sysctl -w lustre.fail_loc=0
210 checkstat $MOUNT1/$tfile-1 && return 1
211 checkstat $MOUNT1/$tfile-2 || return 2
216 run_test 10 "resending a replayed unlink"
220 mcreate $MOUNT1/$tfile-1
221 mcreate $MOUNT2/$tfile-2
222 mcreate $MOUNT1/$tfile-3
223 mcreate $MOUNT2/$tfile-4
224 mcreate $MOUNT1/$tfile-5
225 # drop all reint replies for a while
226 sysctl -w lustre.fail_loc=0x0119
228 #sleep for while, let both clients reconnect and timeout
229 sleep $((TIMEOUT * 2))
230 sysctl -w lustre.fail_loc=0
232 rm $MOUNT1/$tfile-[1-5] || return 1
236 run_test 11 "both clients timeout during replay"
241 multiop $DIR/$tfile mo_c &
246 sysctl -w lustre.fail_loc=0x80000302
248 df $MOUNT || return 1
249 sysctl -w lustre.fail_loc=0
252 $CHECKSTAT -t file $DIR/$tfile || return 2
253 kill -USR1 $MULTIPID || return 3
254 wait $MULTIPID || return 4
259 run_test 12 "open resend timeout"
262 multiop $DIR/$tfile mo_c &
268 kill -USR1 $MULTIPID || return 3
269 wait $MULTIPID || return 4
272 sysctl -w lustre.fail_loc=0x80000115
274 df $MOUNT || return 1
275 sysctl -w lustre.fail_loc=0
278 $CHECKSTAT -t file $DIR/$tfile || return 2
283 run_test 13 "close resend timeout"
287 createmany -o $MOUNT1/$tfile- 25
288 createmany -o $MOUNT2/$tfile-2- 1
289 createmany -o $MOUNT1/$tfile-3- 25
293 # expect failover to fail
294 df $MOUNT && return 1
297 # first 25 files should have been replayed
298 unlinkmany $MOUNT1/$tfile- 25 || return 2
300 zconf_mount `hostname` $MOUNT2
303 run_test 14 "timeouts waiting for lost client during replay"
307 createmany -o $MOUNT1/$tfile- 25
308 createmany -o $MOUNT2/$tfile-2- 1
312 df $MOUNT || return 1
314 unlinkmany $MOUNT1/$tfile- 25 || return 2
316 zconf_mount `hostname` $MOUNT2
319 run_test 15 "timeout waiting for lost client during replay, 1 client completes"
323 createmany -o $MOUNT1/$tfile- 25
324 createmany -o $MOUNT2/$tfile-2- 1
330 df $MOUNT || return 1
332 unlinkmany $MOUNT1/$tfile- 25 || return 2
334 zconf_mount `hostname` $MOUNT2
338 run_test 16 "fail MDS during recovery (3571)"
341 createmany -o $MOUNT1/$tfile- 25
342 createmany -o $MOUNT2/$tfile-2- 1
344 # Make sure the disconnect is lost
351 df $MOUNT || return 1
353 unlinkmany $MOUNT1/$tfile- 25 || return 2
355 zconf_mount `hostname` $MOUNT2
359 run_test 17 "fail OST during recovery (3571)"
361 # cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for it
364 test_18() { # bug 3822 - evicting client with enqueued lock
366 mkdir -p $MOUNT1/$tdir
367 touch $MOUNT1/$tdir/f0
368 #define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
369 statmany -s $MOUNT1/$tdir/f 1 500 &
372 do_facet mds sysctl -w lustre.fail_loc=0x8000030b # hold enqueue
374 #define OBD_FAIL_LDLM_BL_CALLBACK 0x305
375 do_facet client sysctl -w lustre.fail_loc=0x80000305 # drop cb, evict
377 usleep 500 # wait to ensure first client is one that will be evicted
378 openfile -f O_RDONLY $MOUNT2/$tdir/f0
380 dmesg | grep "entering recovery in server" && \
381 error "client not evicted" || true
383 run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)"
385 if [ "$ONLY" != "setup" ]; then
386 equals_msg test complete, cleaning up
387 SLEEP=$((`date +%s` - $NOW))
388 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP