2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
7 # bug number: LU-2012 10124
8 ALWAYS_EXCEPT="14b 15c $REPLAY_DUAL_EXCEPT"
11 PTLDEBUG=${PTLDEBUG:--1}
12 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
14 CLEANUP=${CLEANUP:-""}
15 MOUNT_2=${MOUNT_2:-"yes"}
16 export MULTIOP=${MULTIOP:-multiop}
17 . $LUSTRE/tests/test-framework.sh
20 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
23 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
25 [ "$SLOW" = "no" ] && EXCEPT_SLOW="21b"
29 check_and_setup_lustre
30 MOUNTED=$(mounted_lustre_filesystems)
31 if ! $(echo $MOUNTED' ' | grep -w -q $MOUNT2' '); then
32 zconf_mount $HOSTNAME $MOUNT2
37 rm -rf $DIR/[df][0-9]*
39 [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
41 # if there is no CLIENT1 defined, some tests can be ran on localhost
42 CLIENT1=${CLIENT1:-$HOSTNAME}
43 # if CLIENT2 doesn't exist then use CLIENT1 instead
44 # All tests should use CLIENT2 with MOUNT2 only therefore it will work if
46 # Exception is the test which need two separate nodes
47 CLIENT2=${CLIENT2:-$CLIENT1}
49 # LU-482 Avert LVM and VM inability to flush caches in pre .33 kernels
50 if [ $LINUX_VERSION_CODE -lt $(version_code 2.6.33) ]; then
52 do_facet $SINGLEMDS "sync; sleep 10; sync; sleep 10; sync"
55 LU482_FAILED=$(mktemp -u $TMP/$TESTSUITE.lu482.XXXXXX)
57 echo "Check file is LU482_FAILED=$LU482_FAILED"
58 touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier
59 replay_barrier $SINGLEMDS
60 #define OBD_FAIL_PTLRPC_FINISH_REPLAY | OBD_FAIL_ONCE
62 createmany -o $MOUNT1/$tfile- 50
63 $LCTL set_param fail_loc=0x80000514
64 facet_failover $SINGLEMDS
65 [ -f "$LU482_FAILED" ] && skip "LU-482 failure" && return 0
69 zconf_mount `hostname` $MOUNT2 || error "mount2 fais"
70 unlinkmany $MOUNT1/$tfile- 50 || return 2
71 rm $MOUNT2/$tfile || return 3
72 rm $MOUNT2/$tfile-A || return 4
74 run_test 0a "expired recovery with lost client"
76 if [ -f "$LU482_FAILED" ]; then
77 log "Found check file $LU482_FAILED, aborting test script"
78 rm -vf "$LU482_FAILED"
80 do_nodes $CLIENTS umount -f $MOUNT2 || true
81 do_nodes $CLIENTS umount -f $MOUNT || true
82 # copied from stopall, but avoid the MDS recovery
83 for num in `seq $OSTCOUNT`; do
85 rm -f $TMP/ost${num}active
87 if ! combined_mgs_mds ; then
95 replay_barrier $SINGLEMDS
97 touch $MOUNT1/$tfile-2
99 facet_failover $SINGLEMDS
101 zconf_mount `hostname` $MOUNT1 || error "mount1 fais"
102 zconf_mount `hostname` $MOUNT2 || error "mount2 fais"
103 # it is uncertain if file-2 exists or not, remove it if it does
104 checkstat $MOUNT1/$tfile-2 && rm $MOUNT1/$tfile-2
105 checkstat $MOUNT2/$tfile && return 2
108 run_test 0b "lost client during waiting for next transno"
112 replay_barrier $SINGLEMDS
116 checkstat $MOUNT2/a || return 1
117 checkstat $MOUNT1/b || return 2
118 rm $MOUNT2/a $MOUNT1/b
119 checkstat $MOUNT1/a && return 3
120 checkstat $MOUNT2/b && return 4
124 run_test 1 "|X| simple create"
128 replay_barrier $SINGLEMDS
132 checkstat $MOUNT2/adir || return 1
134 checkstat $MOUNT2/adir && return 2
137 run_test 2 "|X| mkdir adir"
140 replay_barrier $SINGLEMDS
142 mkdir $MOUNT2/adir/bdir
145 checkstat $MOUNT2/adir || return 1
146 checkstat $MOUNT1/adir/bdir || return 2
147 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
148 checkstat $MOUNT1/adir && return 3
149 checkstat $MOUNT2/adir/bdir && return 4
152 run_test 3 "|X| mkdir adir, mkdir adir/bdir "
156 replay_barrier $SINGLEMDS
157 mkdir $MOUNT1/adir && return 1
158 mkdir $MOUNT2/adir/bdir
161 checkstat $MOUNT2/adir || return 2
162 checkstat $MOUNT1/adir/bdir || return 3
164 rmdir $MOUNT2/adir/bdir $MOUNT1/adir
165 checkstat $MOUNT1/adir && return 4
166 checkstat $MOUNT2/adir/bdir && return 5
169 run_test 4 "|X| mkdir adir (-EEXIST), mkdir adir/bdir "
173 # multiclient version of replay_single.sh/test_8
175 multiop_bg_pause $MOUNT2/a o_tSc || return 1
178 replay_barrier $SINGLEMDS
180 wait $pid || return 1
183 [ -e $MOUNT2/a ] && return 2
186 run_test 5 "open, unlink |X| close"
191 multiop_bg_pause $MOUNT2/a o_c || return 1
193 multiop_bg_pause $MOUNT1/a o_c || return 1
196 replay_barrier $SINGLEMDS
198 wait $pid1 || return 1
202 wait $pid2 || return 1
203 [ -e $MOUNT2/a ] && return 2
206 run_test 6 "open1, open2, unlink |X| close1 [fail $SINGLEMDS] close2"
209 replay_barrier $SINGLEMDS
210 drop_reint_reply "mcreate $MOUNT1/$tfile" || return 1
212 checkstat $MOUNT2/$tfile || return 2
213 rm $MOUNT1/$tfile || return 3
217 run_test 8 "replay of resent request"
220 replay_barrier $SINGLEMDS
221 mcreate $MOUNT1/$tfile-1
222 mcreate $MOUNT2/$tfile-2
223 # drop first reint reply
224 do_facet $SINGLEMDS lctl set_param fail_loc=0x80000119
226 do_facet $SINGLEMDS lctl set_param fail_loc=0
228 rm $MOUNT1/$tfile-[1,2] || return 1
232 run_test 9 "resending a replayed create"
235 mcreate $MOUNT1/$tfile-1
236 replay_barrier $SINGLEMDS
237 munlink $MOUNT1/$tfile-1
238 mcreate $MOUNT2/$tfile-2
239 # drop first reint reply
240 do_facet $SINGLEMDS lctl set_param fail_loc=0x80000119
242 do_facet $SINGLEMDS lctl set_param fail_loc=0
244 checkstat $MOUNT1/$tfile-1 && return 1
245 checkstat $MOUNT1/$tfile-2 || return 2
250 run_test 10 "resending a replayed unlink"
253 replay_barrier $SINGLEMDS
254 mcreate $MOUNT1/$tfile-1
255 mcreate $MOUNT2/$tfile-2
256 mcreate $MOUNT1/$tfile-3
257 mcreate $MOUNT2/$tfile-4
258 mcreate $MOUNT1/$tfile-5
259 # drop all reint replies for a while
260 do_facet $SINGLEMDS lctl set_param fail_loc=0x0119
261 # note that with this fail_loc set, facet_failover df will fail
262 facet_failover $SINGLEMDS
263 #sleep for while, let both clients reconnect and timeout
264 sleep $((TIMEOUT * 2))
265 do_facet $SINGLEMDS lctl set_param fail_loc=0
267 rm $MOUNT1/$tfile-[1-5] || return 1
271 run_test 11 "both clients timeout during replay"
274 replay_barrier $SINGLEMDS
276 multiop_bg_pause $DIR/$tfile mo_c || return 1
279 #define OBD_FAIL_LDLM_ENQUEUE_NET 0x302
280 do_facet $SINGLEMDS lctl set_param fail_loc=0x80000302
281 facet_failover $SINGLEMDS
282 do_facet $SINGLEMDS lctl set_param fail_loc=0
283 clients_up || return 1
286 kill -USR1 $MULTIPID || return 3
287 wait $MULTIPID || return 4
288 $CHECKSTAT -t file $DIR/$tfile || return 2
293 run_test 12 "open resend timeout"
296 multiop_bg_pause $DIR/$tfile mo_c || return 1
299 replay_barrier $SINGLEMDS
301 kill -USR1 $MULTIPID || return 3
302 wait $MULTIPID || return 4
305 do_facet $SINGLEMDS lctl set_param fail_loc=0x80000115
306 facet_failover $SINGLEMDS
307 do_facet $SINGLEMDS lctl set_param fail_loc=0
308 clients_up || return 1
311 $CHECKSTAT -t file $DIR/$tfile || return 2
316 run_test 13 "close resend timeout"
318 # test 14a removed after 18143 because it shouldn't fail anymore and do the same
323 wait_delete_completed
325 local BEFOREUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
327 mkdir -p $MOUNT1/$tdir
328 $SETSTRIPE -i 0 $MOUNT1/$tdir
329 replay_barrier $SINGLEMDS
330 createmany -o $MOUNT1/$tdir/$tfile- 5
332 $SETSTRIPE -i 0 $MOUNT2/$tfile-2
333 dd if=/dev/zero of=$MOUNT2/$tfile-2 bs=1M count=5
334 createmany -o $MOUNT1/$tdir/$tfile-3- 5
338 wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
340 # first set of files should have been replayed
341 unlinkmany $MOUNT1/$tdir/$tfile- 5 || error "first unlinks failed"
342 unlinkmany $MOUNT1/$tdir/$tfile-3- 5 || error "second unlinks failed"
344 zconf_mount $HOSTNAME $MOUNT2 || error "mount $MOUNT2 failed"
345 [ -f $MOUNT2/$tfile-2 ] && error "$MOUNT2/$tfile-2 exists!"
347 wait_mds_ost_sync || error "wait_mds_ost_sync failed"
348 wait_delete_completed || error "wait_delete_complete failed"
350 local AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
351 log "before $BEFOREUSED, after $AFTERUSED"
352 # leave some margin for some files/dirs to be modified (OI, llog, etc)
353 [ $AFTERUSED -gt $((BEFOREUSED + 128)) ] &&
354 error "after $AFTERUSED > before $BEFOREUSED" || true
356 run_test 14b "delete ost orphans if gap occured in objids due to VBR"
358 test_15a() { # was test_15
359 replay_barrier $SINGLEMDS
360 createmany -o $MOUNT1/$tfile- 25
361 createmany -o $MOUNT2/$tfile-2- 1
366 unlinkmany $MOUNT1/$tfile- 25 || return 2
367 [ -e $MOUNT1/$tfile-2-0 ] && error "$tfile-2-0 exists"
369 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
372 run_test 15a "timeout waiting for lost client during replay, 1 client completes"
375 replay_barrier $SINGLEMDS
376 for ((i = 0; i < 2000; i++)); do
377 echo "data" > "$MOUNT2/${tfile}-$i" || error "create ${tfile}-$i failed"
383 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
386 run_test 15c "remove multiple OST orphans"
389 replay_barrier $SINGLEMDS
390 createmany -o $MOUNT1/$tfile- 25
391 createmany -o $MOUNT2/$tfile-2- 1
394 facet_failover $SINGLEMDS
398 unlinkmany $MOUNT1/$tfile- 25 || return 2
400 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
404 run_test 16 "fail MDS during recovery (3571)"
407 remote_ost_nodsh && skip "remote OST with nodsh" && return 0
409 createmany -o $MOUNT1/$tfile- 25
410 createmany -o $MOUNT2/$tfile-2- 1
412 # Make sure the disconnect is lost
420 unlinkmany $MOUNT1/$tfile- 25 || return 2
422 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
426 run_test 17 "fail OST during recovery (3571)"
428 # cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for it
431 test_18() { # bug 3822 - evicting client with enqueued lock
433 mkdir -p $MOUNT1/$tdir
434 touch $MOUNT1/$tdir/f0
435 #define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
436 statmany -s $MOUNT1/$tdir/f 1 500 &
439 do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue
441 #define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
442 do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict
444 usleep 500 # wait to ensure first client is one that will be evicted
445 openfile -f O_RDONLY $MOUNT2/$tdir/f0
447 dmesg | grep "entering recovery in server" && \
448 error "client not evicted" || true
449 do_facet client "lctl set_param fail_loc=0"
450 do_facet $SINGLEMDS "lctl set_param fail_loc=0"
452 run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)"
454 test_19() { # Bug 10991 - resend of open request does not fail assertion.
455 replay_barrier $SINGLEMDS
456 drop_ldlm_reply "createmany -o $DIR/$tfile 1" || return 1
458 checkstat $DIR2/${tfile}0 || return 2
459 rm $DIR/${tfile}0 || return 3
463 run_test 19 "resend of open request"
467 replay_barrier $SINGLEMDS
473 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
474 TIER1=$((`date +%s` - BEFORE))
476 replay_barrier $SINGLEMDS
482 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
483 TIER2=$((`date +%s` - BEFORE))
484 [ $TIER2 -ge $((TIER1 * 2)) ] && \
485 error "recovery time is growing $TIER2 > $TIER1"
488 run_test 20 "recovery time is not increasing"
490 # commit on sharing tests
492 local param_file=$TMP/$tfile-params
494 save_lustre_params $SINGLEMDS "mdt.*.commit_on_sharing" > $param_file
495 do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=1
496 touch $MOUNT1/$tfile-1
497 mv $MOUNT2/$tfile-1 $MOUNT2/$tfile-2
498 mv $MOUNT1/$tfile-2 $MOUNT1/$tfile-3
499 replay_barrier_nosync $SINGLEMDS
502 facet_failover $SINGLEMDS
504 # all renames are replayed
505 unlink $MOUNT1/$tfile-3 || return 2
507 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
509 do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=0
510 rm -rf $MOUNT1/$tfile-*
511 restore_lustre_params < $param_file
515 run_test 21a "commit on sharing"
519 do_node $CLIENT1 rm -f $MOUNT1/$tfile-*
522 do_node $CLIENT1 touch $MOUNT1/$tfile-1
523 do_node $CLIENT2 mv $MOUNT1/$tfile-1 $MOUNT1/$tfile-2
524 do_node $CLIENT1 mv $MOUNT1/$tfile-2 $MOUNT1/$tfile-3
526 replay_barrier_nosync $mds
527 shutdown_client $CLIENT2 $MOUNT1
531 # were renames replayed?
533 echo UNLINK $MOUNT1/$tfile-3
534 do_node $CLIENT1 unlink $MOUNT1/$tfile-3 || \
535 { echo "unlink $tfile-3 fail!" && rc=1; }
538 zconf_mount_clients $CLIENT2 $MOUNT1 || error "mount $CLIENT2 $MOUNT1 fail"
544 [ -z "$CLIENTS" ] && skip "Need two or more clients." && return
545 [ $CLIENTCOUNT -lt 2 ] && \
546 { skip "Need two or more clients, have $CLIENTCOUNT" && return; }
548 if [ "$FAILURE_MODE" = "HARD" ] && mixed_mdt_devs; then
549 skip "Several mdt services on one mds node are used with FAILURE_MODE=$FAILURE_MODE. "
554 zconf_umount_clients $CLIENTS $MOUNT2
555 zconf_mount_clients $CLIENTS $MOUNT1
557 local param_file=$TMP/$tfile-params
559 local num=$(get_mds_dir $MOUNT1)
561 save_lustre_params mds$num "mdt.*.commit_on_sharing" > $param_file
565 do_facet mds$num lctl set_param mdt.*.commit_on_sharing=$COS
567 test_21b_sub mds$num || error "Not all renames are replayed. COS=$COS"
569 # COS disabled (should fail)
571 do_facet mds$num lctl set_param mdt.*.commit_on_sharing=$COS
573 # there is still a window when transactions may be written to disk before
574 # the mds device is set R/O. To avoid such a rare test failure, the check
575 # is repeated several times.
578 test_21b_sub mds$num || break;
579 let n_attempts=n_attempts+1
580 [ $n_attempts -gt 3 ] &&
581 error "The test cannot check whether COS works or not: all renames are replied w/o COS"
583 zconf_mount_clients $CLIENTS $MOUNT2
584 restore_lustre_params < $param_file
588 run_test 21b "commit on sharing, two clients"
591 checkstat $MOUNT1/$remote_dir || return 1
592 checkstat $MOUNT1/$remote_dir/dir || return 2
593 checkstat $MOUNT1/$remote_dir/$tfile-1 || return 3
594 checkstat $MOUNT1/$remote_dir/dir/$tfile-1 || return 4
598 create_remote_dir_files_22() {
599 do_node $CLIENT2 mkdir ${MOUNT2}/$remote_dir/dir || return 1
600 do_node $CLIENT1 createmany -o $MOUNT1/$remote_dir/dir/$tfile- 2 ||
602 do_node $CLIENT2 createmany -o $MOUNT2/$remote_dir/$tfile- 2 ||
608 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
609 ([ $FAILURE_MODE == "HARD" ] &&
610 [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
611 skip "MDTs needs to be on diff hosts for HARD fail mode" &&
615 local remote_dir=${tdir}/remote_dir
617 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
619 # OBD_FAIL_MDS_REINT_NET_REP 0x119
620 do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
621 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
625 wait $CLIENT_PID || error "lfs mkdir failed"
627 replay_barrier mds${MDTIDX}
628 create_remote_dir_files_22 || error "Remote creation failed $?"
631 checkstat_22 || error "check stat failed $?"
633 rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
636 run_test 22a "c1 lfs mkdir -i 1 dir1, M0 drop reply & fail, c2 mkdir dir1/dir"
639 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
641 local remote_dir=$tdir/remote_dir
643 # OBD_FAIL_MDS_REINT_NET_REP 0x119
644 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
646 do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
647 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
650 fail mds${MDTIDX},mds$((MDTIDX + 1))
651 wait $CLIENT_PID || error "lfs mkdir failed"
653 replay_barrier mds$MDTIDX
654 create_remote_dir_files_22 || error "Remote creation failed $?"
657 checkstat_22 || error "check stat failed $?"
659 rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
662 run_test 22b "c1 lfs mkdir -i 1 d1, M0 drop reply & fail M0/M1, c2 mkdir d1/dir"
665 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
666 ([ $FAILURE_MODE == "HARD" ] &&
667 [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
668 skip "MDTs needs to be on diff hosts for HARD fail mode" &&
671 local remote_dir=${tdir}/remote_dir
673 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
675 # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
676 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1701
677 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
679 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
681 fail mds$((MDTIDX+1))
682 wait $CLIENT_PID || error "lfs mkdir failed"
684 replay_barrier mds$MDTIDX
685 create_remote_dir_files_22 || error "Remote creation failed $?"
688 checkstat_22 || error "check stat failed $?"
690 rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
693 run_test 22c "c1 lfs mkdir -i 1 d1, M1 drop update & fail M1, c2 mkdir d1/dir"
696 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
698 local remote_dir=${tdir}/remote_dir
700 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
702 # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
703 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1701
704 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
706 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
708 fail mds${MDTIDX},mds$((MDTIDX + 1))
709 wait $CLIENT_PID || error "lfs mkdir failed"
711 replay_barrier mds$MDTIDX
712 create_remote_dir_files_22 || error "Remote creation failed $?"
715 checkstat_22 || error "check stat failed $?"
717 rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
720 run_test 22d "c1 lfs mkdir -i 1 d1, M1 drop update & fail M0/M1,c2 mkdir d1/dir"
723 checkstat $MOUNT1/$remote_dir || return 1
724 checkstat $MOUNT1/$remote_dir/$tfile-1 || return 2
728 create_remote_dir_files_23() {
729 do_node $CLIENT2 mkdir ${MOUNT2}/$remote_dir || return 1
730 do_node $CLIENT2 createmany -o $MOUNT2/$remote_dir/$tfile- 2 || return 2
735 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
736 ([ $FAILURE_MODE == "HARD" ] &&
737 [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
738 skip "MDTs needs to be on diff hosts for HARD fail mode" &&
741 local remote_dir=$tdir/remote_dir
743 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
744 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
745 error "lfs mkdir failed"
746 # OBD_FAIL_MDS_REINT_NET_REP 0x119
747 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
748 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
750 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
752 fail mds$((MDTIDX + 1))
753 wait $CLIENT_PID || error "rmdir remote dir failed"
755 replay_barrier mds${MDTIDX}
756 create_remote_dir_files_23 || error "Remote creation failed $?"
759 checkstat_23 || error "check stat failed $?"
761 rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
764 run_test 23a "c1 rmdir d1, M1 drop reply and fail, client2 mkdir d1"
767 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
769 local remote_dir=$tdir/remote_dir
771 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
772 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
773 error "lfs mkdir failed"
775 # OBD_FAIL_MDS_REINT_NET_REP 0x119
776 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
777 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
779 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
781 fail mds${MDTIDX},mds$((MDTIDX + 1))
782 wait $CLIENT_PID || error "rmdir remote dir failed"
784 replay_barrier mds${MDTIDX}
785 create_remote_dir_files_23 || error "Remote creation failed $?"
788 checkstat_23 || error "check stat failed $?"
790 rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
793 run_test 23b "c1 rmdir d1, M1 drop reply and fail M0/M1, c2 mkdir d1"
796 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
798 ([ $FAILURE_MODE == "HARD" ] &&
799 [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
800 skip "MDTs needs to be on diff hosts for HARD fail mode" &&
803 local remote_dir=$tdir/remote_dir
805 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
806 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
807 error "lfs mkdir failed"
809 # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
810 do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
811 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
813 do_facet mds${MDTIDX} lctl set_param fail_loc=0
816 wait $CLIENT_PID || error "rmdir remote dir failed"
818 replay_barrier mds${MDTIDX}
819 create_remote_dir_files_23 || error "Remote creation failed $?"
822 checkstat_23 || error "check stat failed $?"
824 rm -rf $MOUNT1/$tdir || return 6
827 run_test 23c "c1 rmdir d1, M0 drop update reply and fail M0, c2 mkdir d1"
830 [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
832 local remote_dir=$tdir/remote_dir
834 do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
835 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
836 error "lfs mkdir failed"
838 # OBD_FAIL_UPDATE_OBJ_NET 0x1701
839 do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
840 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
842 do_facet mds${MDTIDX} lctl set_param fail_loc=0
844 fail mds${MDTIDX},mds$((MDTIDX + 1))
845 wait $CLIENT_PID || error "rmdir remote dir failed"
847 replay_barrier mds${MDTIDX}
848 create_remote_dir_files_23 || error "Remote creation failed $?"
851 checkstat_23 || error "check stat failed $?"
853 rm -rf $MOUNT1/$tdir || return 6
856 run_test 23d "c1 rmdir d1, M0 drop update reply and fail M0/M1, c2 mkdir d1"
858 # end commit on sharing tests
861 SLEEP=$((`date +%s` - $NOW))
862 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
863 [ "$MOUNTED2" = yes ] && zconf_umount $HOSTNAME $MOUNT2 || true
864 check_and_cleanup_lustre