6 ALWAYS_EXCEPT="2 $REPLAY_VBR_EXCEPT"
9 PTLDEBUG=${PTLDEBUG:--1}
10 LUSTRE=${LUSTRE:-`dirname $0`/..}
12 CLEANUP=${CLEANUP:-""}
13 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
19 [ -n "$CLIENTS" ] || { skip "Need two or more clients" && exit 0; }
20 [ $CLIENTCOUNT -ge 2 ] || \
21 { skip "Need two or more clients, have $CLIENTCOUNT" && exit 0; }
22 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
24 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
27 [ ! "$NAME" = "ncli" ] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT"
28 [ "$NAME" = "ncli" ] && MOUNT_2=""
32 check_and_setup_lustre
33 rm -rf $DIR/[df][0-9]*
35 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
37 [ "$CLIENTS" ] && zconf_umount_clients $CLIENTS $DIR
40 echo "mount client $CLIENT1,$CLIENT2..."
41 zconf_mount_clients $CLIENT1 $DIR
42 zconf_mount_clients $CLIENT2 $DIR
44 do_node $CLIENT2 mkdir -p $DIR/$tdir
45 replay_barrier $SINGLEMDS
46 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
47 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 1
48 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
49 zconf_umount $CLIENT2 $DIR
51 facet_failover $SINGLEMDS
52 # recovery shouldn't fail due to missing client 2
53 do_node $CLIENT1 df $DIR || return 1
55 # All 50 files should have been replayed
56 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
57 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
59 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
60 [ -e $DIR/$tdir/$tfile-2-0 ] && error "$tfile-2-0 exists"
62 zconf_umount_clients $CLIENTS $DIR
65 run_test 1 "lost client doesn't affect another during replay"
68 zconf_mount_clients $CLIENT1 $DIR
69 zconf_mount_clients $CLIENT2 $DIR
71 do_node $CLIENT2 mkdir -p $DIR/$tdir
72 replay_barrier $SINGLEMDS
73 do_node $CLIENT2 mcreate $DIR/$tdir/$tfile
74 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
75 #client1 read data from client2 which will be lost
76 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile
77 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
78 zconf_umount $CLIENT2 $DIR
80 facet_failover $SINGLEMDS
81 # recovery shouldn't fail due to missing client 2
82 do_node $CLIENT1 df $DIR || return 1
84 # All 50 files should have been replayed
85 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
86 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
87 do_node $CLIENT1 $CHECKSTAT $DIR/$tdir/$tfile && return 4
89 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
91 zconf_umount_clients $CLIENTS $DIR
94 run_test 2 "lost data due to missed REMOTE client during replay"
97 zconf_mount_clients $CLIENT1 $DIR
98 zconf_mount_clients $CLIENT2 $DIR
100 #make sure the time will change
101 local var=${SINGLEMDS}_svc
102 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
103 do_node $CLIENT1 touch $DIR/$tfile
104 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
106 replay_barrier $SINGLEMDS
108 do_node $CLIENT2 touch $DIR/$tfile
109 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
111 do_node $CLIENT1 touch $DIR/$tfile
113 do_node $CLIENT1 rm $DIR/$tfile
114 zconf_umount $CLIENT2 $DIR
116 facet_failover $SINGLEMDS
117 # recovery shouldn't fail due to missing client 2
118 do_node $CLIENT1 df $DIR || return 1
119 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
121 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
123 zconf_umount_clients $CLIENTS $DIR
127 run_test 3a "setattr of time/size doesn't change version"
130 zconf_mount_clients $CLIENT1 $DIR
131 zconf_mount_clients $CLIENT2 $DIR
133 #make sure the time will change
134 local var=${SINGLEMDS}_svc
135 do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.atime_diff=0" || return
137 do_node $CLIENT1 touch $DIR/$tfile
138 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
140 replay_barrier $SINGLEMDS
142 do_node $CLIENT2 chmod +x $DIR/$tfile
143 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile
145 do_node $CLIENT1 chmod -x $DIR/$tfile
146 zconf_umount $CLIENT2 $DIR
148 facet_failover $SINGLEMDS
149 # recovery should fail due to missing client 2
150 do_node $CLIENT1 df $DIR && return 1
152 do_node $CLIENT1 $CHECKSTAT -p 755 $DIR/$tfile && return 2
153 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
155 zconf_umount_clients $CLIENTS $DIR
159 run_test 3b "setattr of permissions changes version"
161 vbr_deactivate_client() {
163 echo "Deactivating client $client";
164 do_node $client "sysctl -w lustre.fail_loc=0x50d"
167 vbr_activate_client() {
169 echo "Activating client $client";
170 do_node $client "sysctl -w lustre.fail_loc=0x0"
176 [ -z "$(do_node $client lctl dl | grep mdt)" ] && \
177 [ -z "$(do_node $client lctl dl | grep ost)" ]
181 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
183 remote_server $CLIENT2 || \
184 { skip "Client $CLIENT2 is on the server node" && return 0; }
186 zconf_mount_clients $CLIENT1 $DIR
187 zconf_mount_clients $CLIENT2 $DIR
189 do_node $CLIENT2 mkdir -p $DIR/$tdir
190 replay_barrier $SINGLEMDS
191 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
192 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
193 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
194 vbr_deactivate_client $CLIENT2
196 facet_failover $SINGLEMDS
197 do_node $CLIENT1 df $DIR || return 1
199 # All 50 files should have been replayed
200 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
201 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
203 vbr_activate_client $CLIENT2
204 do_node $CLIENT2 df $DIR || return 4
205 # All 25 files from client2 should have been replayed
206 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
208 zconf_umount_clients $CLIENTS $DIR
211 run_test 4a "fail MDS, delayed recovery"
214 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
216 remote_server $CLIENT2 || \
217 { skip "Client $CLIENT2 is on the server node" && return 0; }
219 zconf_mount_clients $CLIENT1 $DIR
220 zconf_mount_clients $CLIENT2 $DIR
222 replay_barrier $SINGLEMDS
223 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
224 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
225 vbr_deactivate_client $CLIENT2
227 facet_failover $SINGLEMDS
228 do_node $CLIENT1 df $DIR || return 1
230 # create another set of files
231 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
233 vbr_activate_client $CLIENT2
234 do_node $CLIENT2 df $DIR || return 2
236 # All files from should have been replayed
237 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
238 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
239 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
241 zconf_umount_clients $CLIENTS $DIR
243 run_test 4b "fail MDS, normal operation, delayed open recovery"
246 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
248 remote_server $CLIENT2 || \
249 { skip "Client $CLIENT2 is on the server node" && return 0; }
251 zconf_mount_clients $CLIENT1 $DIR
252 zconf_mount_clients $CLIENT2 $DIR
254 replay_barrier $SINGLEMDS
255 do_node $CLIENT1 createmany -m $DIR/$tfile- 25
256 do_node $CLIENT2 createmany -m $DIR/$tdir/$tfile-2- 25
257 vbr_deactivate_client $CLIENT2
259 facet_failover $SINGLEMDS
260 do_node $CLIENT1 df $DIR || return 1
262 # create another set of files
263 do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
265 vbr_activate_client $CLIENT2
266 do_node $CLIENT2 df $DIR || return 2
268 # All files from should have been replayed
269 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
270 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 4
271 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
273 zconf_umount_clients $CLIENTS $DIR
275 run_test 4c "fail MDS, normal operation, delayed recovery"
278 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
280 remote_server $CLIENT2 || \
281 { skip "Client $CLIENT2 is on the server node" && return 0; }
283 zconf_mount_clients $CLIENT1 $DIR
284 zconf_mount_clients $CLIENT2 $DIR
286 replay_barrier $SINGLEMDS
287 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
288 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
289 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 1
290 vbr_deactivate_client $CLIENT2
292 facet_failover $SINGLEMDS
293 do_node $CLIENT1 df $DIR && return 1
295 vbr_activate_client $CLIENT2
296 do_node $CLIENT2 df $DIR || return 2
298 # First 25 files should have been replayed
299 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
300 # Third file is failed due to missed client2
301 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-3-0 && error "$tfile-3-0 exists"
302 # file from client2 should exists
303 do_node $CLIENT2 unlinkmany $DIR/$tfile-2- 1 || return 4
305 zconf_umount_clients $CLIENTS $DIR
307 run_test 5a "fail MDS, delayed recovery should fail"
310 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
312 remote_server $CLIENT2 || \
313 { skip "Client $CLIENT2 is on the server node" && return 0; }
315 zconf_mount_clients $CLIENT1 $DIR
316 zconf_mount_clients $CLIENT2 $DIR
318 replay_barrier $SINGLEMDS
319 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
320 do_node $CLIENT2 createmany -o $DIR/$tfile-2- 1
321 vbr_deactivate_client $CLIENT2
323 facet_failover $SINGLEMDS
324 do_node $CLIENT1 df $DIR || return 1
325 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
327 # create another set of files
328 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
330 vbr_activate_client $CLIENT2
331 do_node $CLIENT2 df $DIR && return 4
332 # file from client2 should fail
333 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
335 # All 50 files from client 1 should have been replayed
336 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
337 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
339 zconf_umount_clients $CLIENTS $DIR
341 run_test 5b "fail MDS, normal operation, delayed recovery should fail"
344 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
346 remote_server $CLIENT2 || \
347 { skip "Client $CLIENT2 is on the server node" && return 0; }
349 zconf_mount_clients $CLIENT1 $DIR
350 zconf_mount_clients $CLIENT2 $DIR
352 do_node $CLIENT2 mkdir -p $DIR/$tdir
353 replay_barrier $SINGLEMDS
354 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
355 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
356 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
357 vbr_deactivate_client $CLIENT2
359 facet_failover $SINGLEMDS
360 # replay only 5 requests
361 do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
362 #define OBD_FAIL_PTLRPC_REPLAY 0x50e
363 do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
364 do_node $CLIENT2 df $DIR
365 # vbr_activate_client $CLIENT2
366 # need way to know that client stops replays
369 facet_failover $SINGLEMDS
370 do_node $CLIENT1 df $DIR || return 1
372 # All files should have been replayed
373 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
374 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
375 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
377 zconf_umount_clients $CLIENTS $DIR
380 run_test 6a "fail MDS, delayed recovery, fail MDS"
383 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
385 remote_server $CLIENT2 || \
386 { skip "Client $CLIENT2 is on the server node" && return 0; }
388 zconf_mount_clients $CLIENT1 $DIR
389 zconf_mount_clients $CLIENT2 $DIR
391 do_node $CLIENT2 mkdir -p $DIR/$tdir
392 replay_barrier $SINGLEMDS
393 do_node $CLIENT1 createmany -o $DIR/$tfile- 25
394 do_node $CLIENT2 createmany -o $DIR/$tdir/$tfile-2- 25
395 do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
396 vbr_deactivate_client $CLIENT2
398 facet_failover $SINGLEMDS
399 vbr_activate_client $CLIENT2
400 do_node $CLIENT2 df $DIR || return 4
402 facet_failover $SINGLEMDS
403 do_node $CLIENT1 df $DIR || return 1
405 # All files should have been replayed
406 do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
407 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
408 do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
410 zconf_umount_clients $CLIENTS $DIR
413 run_test 7a "fail MDS, delayed recovery, fail MDS"
419 # We need to run do_node in bg, because pdsh does not exit
420 # if child process of run script exists.
421 # I.e. pdsh does not exit when runmultiop_bg_pause exited,
422 # because of multiop_bg_pause -> $MULTIOP_PROG &
423 # By the same reason we need sleep a bit after do_nodes starts
424 # to let runmultiop_bg_pause start muliop and
425 # update /tmp/multiop_bg.pid ;
426 # The rm /tmp/multiop_bg.pid guarantees here that
427 # we have the updated by runmultiop_bg_pause
428 # /tmp/multiop_bg.pid file
430 local pid_file=$TMP/multiop_bg.pid.$$
431 do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file O_tSc" &
435 multiop_pid=$(do_node $client cat $pid_file)
436 [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file "
437 eval export $(client_var_name $client)_multiop_pid=$multiop_pid
438 eval export $(client_var_name $client)_do_node_pid=$pid
439 local var=$(client_var_name $client)_multiop_pid
440 echo client $client multiop_bg started multiop_pid=${!var}
446 local multiop_pid=$(client_var_name $client)_multiop_pid
447 local do_node_pid=$(client_var_name $client)_do_node_pid
449 echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)"
450 do_node $client kill -USR1 ${!multiop_pid}
452 wait ${!do_node_pid} || true
456 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
458 remote_server $CLIENT2 || \
459 { skip "Client $CLIENT2 is on the server node" && return 0; }
461 zconf_mount_clients $CLIENT1 $DIR
462 zconf_mount_clients $CLIENT2 $DIR
464 rmultiop_start $CLIENT2 $DIR/$tfile || return 1
465 do_node $CLIENT2 rm -f $DIR/$tfile
466 replay_barrier $SINGLEMDS
467 rmultiop_stop $CLIENT2 || return 2
469 vbr_deactivate_client $CLIENT2
470 facet_failover $SINGLEMDS
471 do_node $CLIENT1 df $DIR || return 3
472 #client1 is back and will try to open orphan
473 vbr_activate_client $CLIENT2
474 do_node $CLIENT2 df $DIR || return 4
476 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
477 zconf_umount_clients $CLIENTS $DIR
480 run_test 8a "orphans are kept until delayed recovery"
483 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
485 remote_server $CLIENT2 || \
486 { skip "Client $CLIENT2 is on the server node" && return 0; }
488 zconf_mount_clients $CLIENT1 $DIR
489 zconf_mount_clients $CLIENT2 $DIR
491 rmultiop_start $CLIENT2 $DIR/$tfile || return 1
492 replay_barrier $SINGLEMDS
493 do_node $CLIENT1 rm -f $DIR/$tfile
495 vbr_deactivate_client $CLIENT2
496 facet_failover $SINGLEMDS
497 do_node $CLIENT1 df $DIR || return 2
498 #client1 is back and will try to open orphan
499 vbr_activate_client $CLIENT2
500 do_node $CLIENT2 df $DIR || return 3
502 rmultiop_stop $CLIENT2 || return 1
503 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
504 zconf_umount_clients $CLIENTS $DIR
507 run_test 8b "open1 | unlink2 X delayed_replay1, close1"
510 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
512 remote_server $CLIENT2 || \
513 { skip "Client $CLIENT2 is on the server node" && return 0; }
515 zconf_mount_clients $CLIENT1 $DIR
516 zconf_mount_clients $CLIENT2 $DIR
518 rmultiop_start $CLIENT2 $DIR/$tfile || return 1
519 replay_barrier $SINGLEMDS
520 do_node $CLIENT1 rm -f $DIR/$tfile
521 rmultiop_stop $CLIENT2 || return 2
523 vbr_deactivate_client $CLIENT2
524 facet_failover $SINGLEMDS
525 do_node $CLIENT1 df $DIR || return 3
526 #client1 is back and will try to open orphan
527 vbr_activate_client $CLIENT2
528 do_node $CLIENT2 df $DIR || return 4
530 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
531 zconf_umount_clients $CLIENTS $DIR
534 run_test 8c "open1 | unlink2, close1 X delayed_replay1"
537 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
539 remote_server $CLIENT2 || \
540 { skip "Client $CLIENT2 is on the server node" && return 0; }
542 zconf_mount_clients $CLIENT1 $DIR
543 zconf_mount_clients $CLIENT2 $DIR
545 rmultiop_start $CLIENT1 $DIR/$tfile || return 1
546 rmultiop_start $CLIENT2 $DIR/$tfile || return 2
547 replay_barrier $SINGLEMDS
548 do_node $CLIENT1 rm -f $DIR/$tfile
549 rmultiop_stop $CLIENT2 || return 3
550 rmultiop_stop $CLIENT1 || return 4
552 vbr_deactivate_client $CLIENT2
553 facet_failover $SINGLEMDS
554 do_node $CLIENT1 df $DIR || return 6
556 #client1 is back and will try to open orphan
557 vbr_activate_client $CLIENT2
558 do_node $CLIENT2 df $DIR || return 8
560 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
561 zconf_umount_clients $CLIENTS $DIR
564 run_test 8d "open1, open2 | unlink2, close1, close2 X delayed_replay1"
567 zconf_mount $CLIENT1 $DIR
568 zconf_mount $CLIENT2 $DIR
570 do_node $CLIENT1 mcreate $DIR/$tfile
571 do_node $CLIENT1 mkdir $DIR/$tfile-2
572 replay_barrier $SINGLEMDS
573 # missed replay from client1 will lead to recovery by versions
574 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
575 do_node $CLIENT2 rm $DIR/$tfile || return 1
576 do_node $CLIENT2 touch $DIR/$tfile || return 2
578 zconf_umount $CLIENT1 $DIR
579 facet_failover $SINGLEMDS
580 do_node $CLIENT2 df $DIR || return 6
582 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
583 zconf_umount_clients $CLIENTS $DIR
586 run_test 8e "create | unlink, create shouldn't fail"
589 zconf_mount_clients $CLIENT1 $DIR
590 zconf_mount_clients $CLIENT2 $DIR
592 do_node $CLIENT1 touch $DIR/$tfile
593 do_node $CLIENT1 mkdir $DIR/$tfile-2
594 replay_barrier $SINGLEMDS
595 # missed replay from client1 will lead to recovery by versions
596 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
597 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
598 do_node $CLIENT2 mcreate $DIR/$tfile || return 2
600 zconf_umount $CLIENT1 $DIR
601 facet_failover $SINGLEMDS
602 do_node $CLIENT2 df $DIR || return 6
604 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
605 zconf_umount $CLIENT2 $DIR
608 run_test 8f "create | unlink, create shouldn't fail"
611 zconf_mount_clients $CLIENT1 $DIR
612 zconf_mount_clients $CLIENT2 $DIR
614 do_node $CLIENT1 touch $DIR/$tfile
615 do_node $CLIENT1 mkdir $DIR/$tfile-2
616 replay_barrier $SINGLEMDS
617 # missed replay from client1 will lead to recovery by versions
618 do_node $CLIENT1 touch $DIR/$tfile-2/$tfile
619 do_node $CLIENT2 rm -f $DIR/$tfile || return 1
620 do_node $CLIENT2 mkdir $DIR/$tfile || return 2
622 zconf_umount $CLIENT1 $DIR
623 facet_failover $SINGLEMDS
624 do_node $CLIENT2 df $DIR || return 6
626 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
627 zconf_umount $CLIENT2 $DIR
630 run_test 8g "create | unlink, create shouldn't fail"
633 delayed_recovery_enabled || { skip "No delayed recovery support"; return 0; }
635 [ -z "$DBENCH_LIB" ] && skip "DBENCH_LIB is not set" && return 0
637 zconf_mount_clients $CLIENTS $DIR
639 local duration="-t 60"
640 local cmd="rundbench 1 $duration "
642 for CLIENT in ${CLIENTS//,/ }; do
643 $PDSH $CLIENT "set -x; PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests/:${DBENCH_LIB} DBENCH_LIB=${DBENCH_LIB} $cmd" &
645 echo $PID >pid.$CLIENT
646 echo "Started load PID=`cat pid.$CLIENT`"
649 replay_barrier $SINGLEMDS
650 sleep 3 # give clients a time to do operations
652 vbr_deactivate_client $CLIENT2
654 log "$TESTNAME fail $SINGLEMDS 1"
657 # wait for client to reconnect to MDS
660 vbr_activate_client $CLIENT2
661 do_node $CLIENT2 df $DIR || return 4
663 for CLIENT in ${CLIENTS//,/ }; do
664 PID=`cat pid.$CLIENT`
667 echo "load on ${CLIENT} returned $rc"
670 zconf_umount_clients $CLIENTS $DIR
672 run_test 10 "mds version recovery; $CLIENTCOUNT clients"
674 equals_msg `basename $0`: test complete, cleaning up
675 #SLEEP=$((`date +%s` - $NOW))
676 #[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
677 check_and_cleanup_lustre
678 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true