5 LUSTRE=${LUSTRE:-`dirname $0`/..}
6 . $LUSTRE/tests/test-framework.sh
10 . ${CONFIG:=$LUSTRE/tests/cfg/lmv.sh}
14 if [ "$MDSCOUNT" -gt 1 ]; then
16 for mds in `mds_list`; do
17 MDSDEV=$TMP/${mds}-`hostname`
18 add_mds $mds --dev $MDSDEV --size $MDSSIZE --lmv lmv1
20 add_lov_to_lmv lov1 lmv1 --stripe_sz $STRIPE_BYTES \
21 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
24 add_mds mds1 --dev $MDSDEV --size $MDSSIZE
25 add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES \
26 --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
31 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
32 add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
33 add_client client --mds ${MDS} --lov lov1 --path $MOUNT
41 # make sure we are using the primary MDS, so the config log will
42 # be able to clean up properly.
43 activemds=`facet_active mds1`
44 if [ $activemds != "mds1" ]; then
48 umount $MOUNT2 || true
51 for mds in `mds_list`; do
52 stop $mds ${FORCE} $MDSLCONFARGS
55 stop ost ${FORCE} --dump cleanup-dual.log
58 if [ "$ONLY" == "cleanup" ]; then
59 sysctl -w portals.debug=0
65 start ost --reformat $OSTLCONFARGS
66 PINGER=`cat /proc/fs/lustre/pinger`
68 if [ "$PINGER" != "on" ]; then
69 echo "ERROR: Lustre must be built with --enable-pinger for replay-dual"
74 start ost2 --reformat $OSTLCONFARGS
75 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
76 for mds in `mds_list`; do
77 start $mds --reformat $MDSLCONFARGS
79 grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
80 grep " $MOUNT2 " /proc/mounts || zconf_mount `hostname` $MOUNT2
82 echo $TIMEOUT > /proc/sys/lustre/timeout
83 echo $UPCALL > /proc/sys/lustre/upcall
85 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
93 checkstat $MOUNT2/a || return 1
94 checkstat $MOUNT1/b || return 2
95 rm $MOUNT2/a $MOUNT1/b
96 checkstat $MOUNT1/a && return 3
97 checkstat $MOUNT2/b && return 4
101 run_test 1 "|X| simple create"
109 checkstat $MOUNT2/adir || return 1
111 checkstat $MOUNT2/adir && return 2
115 run_test 2 "|X| mkdir adir"
120 mkdir $MOUNT2/adir/bdir
123 checkstat $MOUNT2/adir || return 1
124 checkstat $MOUNT1/adir/bdir || return 2
125 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
126 checkstat $MOUNT1/adir && return 3
127 checkstat $MOUNT2/adir/bdir && return 4
131 run_test 3 "|X| mkdir adir, mkdir adir/bdir "
136 mkdir $MOUNT1/adir && return 1
137 mkdir $MOUNT2/adir/bdir
140 checkstat $MOUNT2/adir || return 2
141 checkstat $MOUNT1/adir/bdir || return 3
143 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
144 checkstat $MOUNT1/adir && return 4
145 checkstat $MOUNT2/adir/bdir && return 5
149 run_test 4 "|X| mkdir adir (-EEXIST), mkdir adir/bdir "
153 # multiclient version of replay_single.sh/test_8
155 multiop $MOUNT2/a o_tSc &
157 # give multiop a chance to open
162 wait $pid || return 1
165 [ -e $MOUNT2/a ] && return 2
168 run_test 5 "open, unlink |X| close"
173 multiop $MOUNT2/a o_c &
175 multiop $MOUNT1/a o_c &
177 # give multiop a chance to open
182 wait $pid1 || return 1
186 wait $pid2 || return 1
187 [ -e $MOUNT2/a ] && return 2
190 run_test 6 "open1, open2, unlink |X| close1 [fail mds] close2"
194 multiop $MOUNT2/a o_c &
196 multiop $MOUNT1/a o_c &
198 # give multiop a chance to open
203 wait $pid2 || return 1
207 wait $pid1 || return 1
208 [ -e $MOUNT2/a ] && return 2
211 run_test 6b "open1, open2, unlink |X| close2 [fail mds] close1"
215 createmany -o $MOUNT1/$tfile- 25
216 createmany -o $MOUNT2/$tfile-2- 1
217 createmany -o $MOUNT1/$tfile-3- 25
221 # expect failover to fail
222 df $MOUNT && return 1
224 # 3313 - current fix for 3313 prevents any reply here
225 # unlinkmany $MOUNT1/$tfile- 25 || return 2
227 zconf_mount `hostname` $MOUNT2
230 run_test 7 "timeouts waiting for lost client during replay"
235 drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1
237 checkstat $MOUNT2/$tfile || return 2
238 rm $MOUNT1/$tfile || return 3
242 run_test 8 "replay of resent request"
246 mcreate $MOUNT1/$tfile-1
247 mcreate $MOUNT2/$tfile-2
248 # drop first reint reply
249 sysctl -w lustre.fail_loc=0x80000119
251 sysctl -w lustre.fail_loc=0
253 rm $MOUNT1/$tfile-[1,2] || return 1
257 run_test 9 "resending a replayed create"
260 mcreate $MOUNT1/$tfile-1
262 munlink $MOUNT1/$tfile-1
263 mcreate $MOUNT2/$tfile-2
264 # drop first reint reply
265 sysctl -w lustre.fail_loc=0x80000119
267 sysctl -w lustre.fail_loc=0
269 checkstat $MOUNT1/$tfile-1 && return 1
270 checkstat $MOUNT1/$tfile-2 || return 2
275 run_test 10 "resending a replayed unlink"
279 mcreate $MOUNT1/$tfile-1
280 mcreate $MOUNT2/$tfile-2
281 mcreate $MOUNT1/$tfile-3
282 mcreate $MOUNT2/$tfile-4
283 mcreate $MOUNT1/$tfile-5
284 # drop all reint replies for a while
285 sysctl -w lustre.fail_loc=0x0119
287 #sleep for while, let both clients reconnect and timeout
288 sleep $((TIMEOUT * 2))
289 sysctl -w lustre.fail_loc=0
291 rm $MOUNT1/$tfile-[1-5] || return 1
295 run_test 11 "both clients timeout during replay"
300 multiop $DIR/$tfile mo_c &
305 sysctl -w lustre.fail_loc=0x80000302
307 df $MOUNT || return 1
308 sysctl -w lustre.fail_loc=0
311 $CHECKSTAT -t file $DIR/$tfile || return 2
312 kill -USR1 $MULTIPID || return 3
313 wait $MULTIPID || return 4
318 run_test 12 "open resend timeout"
321 multiop $DIR/$tfile mo_c &
327 kill -USR1 $MULTIPID || return 3
328 wait $MULTIPID || return 4
331 sysctl -w lustre.fail_loc=0x80000115
333 df $MOUNT || return 1
334 sysctl -w lustre.fail_loc=0
337 $CHECKSTAT -t file $DIR/$tfile || return 2
342 run_test 13 "close resend timeout"
344 if [ "$ONLY" != "setup" ]; then
345 equals_msg test complete, cleaning up