require_dsh_mds || exit 0
# Skip these tests
-# bug number: 17466 18857 LU1867
-ALWAYS_EXCEPT="61d 33a 33b 89 $REPLAY_SINGLE_EXCEPT"
+# bug number for skipped tests:
+# LU-472
+ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT 61d"
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-# 63 min 7 min AT AT AT AT"
-[ "$SLOW" = "no" ] && EXCEPT_SLOW="1 2 3 4 6 12 16 44a 44b 65 66 67 68"
+case "$(lsb_release -sr)" in # only disable tests for el7
+7*) # bug number: LU-6455-----
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 28"
+ ;;
+esac
+
+# 7.5 (min)"
+[ "$SLOW" = "no" ] && EXCEPT_SLOW="44b"
+
+[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
+# bug number for skipped test: LU-1867 LU-3127
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 89 73b"
build_test_filter
mkdir -p $DIR
assert_DIR
-rm -rf $DIR/[df][0-9]*
+rm -rf $DIR/[df][0-9]* $DIR/f.$TESTSUITE.*
# LU-482 Avert LVM and VM inability to flush caches in pre .33 kernels
if [ $LINUX_VERSION_CODE -lt $(version_code 2.6.33) ]; then
fi
test_0a() { # was test_0
- mkdir $DIR/$tfile
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- rmdir $DIR/$tfile
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ rmdir $DIR/$tdir
}
run_test 0a "empty replay"
test_0b() {
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- # this test attempts to trigger a race in the precreation code,
- # and must run before any other objects are created on the filesystem
- fail ost1
- createmany -o $DIR/$tfile 20 || return 1
- unlinkmany $DIR/$tfile 20 || return 2
+ # this test attempts to trigger a race in the precreation code,
+ # and must run before any other objects are created on the filesystem
+ fail ost1
+ createmany -o $DIR/$tfile 20 || error "createmany -o $DIR/$tfile failed"
+ unlinkmany $DIR/$tfile 20 || error "unlinkmany $DIR/$tfile failed"
}
run_test 0b "ensure object created after recover exists. (3284)"
mcreate $DIR/$tfile
umount $MOUNT
facet_failover $SINGLEMDS
- zconf_mount `hostname` $MOUNT || error "mount fails"
+ zconf_mount $(hostname) $MOUNT || error "mount fails"
client_up || error "post-failover df failed"
# file shouldn't exist if replay-barrier works as expected
- rm $DIR/$tfile && return 1
+ rm $DIR/$tfile && error "File exists and it shouldn't"
return 0
}
run_test 0c "check replay-barrier"
test_0d() {
- replay_barrier $SINGLEMDS
- umount $MOUNT
- facet_failover $SINGLEMDS
- zconf_mount `hostname` $MOUNT || error "mount fails"
- client_up || error "post-failover df failed"
+ replay_barrier $SINGLEMDS
+ umount $MOUNT
+ facet_failover $SINGLEMDS
+ zconf_mount $(hostname) $MOUNT || error "mount fails"
+ client_up || error "post-failover df failed"
}
run_test 0d "expired recovery with no clients"
test_1() {
- replay_barrier $SINGLEMDS
- mcreate $DIR/$tfile
- fail $SINGLEMDS
- $CHECKSTAT -t file $DIR/$tfile || return 1
- rm $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ mcreate $DIR/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ rm $DIR/$tfile
}
run_test 1 "simple create"
test_2a() {
- replay_barrier $SINGLEMDS
- touch $DIR/$tfile
- fail $SINGLEMDS
- $CHECKSTAT -t file $DIR/$tfile || return 1
- rm $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ touch $DIR/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ rm $DIR/$tfile
}
run_test 2a "touch"
test_2b() {
- mcreate $DIR/$tfile
- replay_barrier $SINGLEMDS
- touch $DIR/$tfile
- fail $SINGLEMDS
- $CHECKSTAT -t file $DIR/$tfile || return 1
- rm $DIR/$tfile
+ mcreate $DIR/$tfile || error "mcreate $DIR/$tfile failed"
+ replay_barrier $SINGLEMDS
+ touch $DIR/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ rm $DIR/$tfile
}
run_test 2b "touch"
test_3a() {
- local file=$DIR/$tfile
- replay_barrier $SINGLEMDS
- mcreate $file
- openfile -f O_DIRECTORY $file
- fail $SINGLEMDS
- $CHECKSTAT -t file $file || return 2
- rm $file
+ local file=$DIR/$tfile
+ replay_barrier $SINGLEMDS
+ mcreate $file
+ openfile -f O_DIRECTORY $file
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $file ||
+ error "$CHECKSTAT $file attribute check failed"
+ rm $file
}
run_test 3a "replay failed open(O_DIRECTORY)"
test_3b() {
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000114"
- touch $DIR/$tfile
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- fail $SINGLEMDS
- $CHECKSTAT -t file $DIR/$tfile && return 2
- return 0
+ replay_barrier $SINGLEMDS
+ #define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000114"
+ touch $DIR/$tfile
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile &&
+ error "$CHECKSTAT $DIR/$tfile attribute check should fail"
+ return 0
}
run_test 3b "replay failed open -ENOMEM"
test_3c() {
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000128"
- touch $DIR/$tfile
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- fail $SINGLEMDS
-
- $CHECKSTAT -t file $DIR/$tfile && return 2
- return 0
+ replay_barrier $SINGLEMDS
+ #define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000128"
+ touch $DIR/$tfile
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ fail $SINGLEMDS
+
+ $CHECKSTAT -t file $DIR/$tfile &&
+ error "$CHECKSTAT $DIR/$tfile attribute check should fail"
+ return 0
}
run_test 3c "replay failed open -ENOMEM"
test_4a() { # was test_4
- replay_barrier $SINGLEMDS
- for i in `seq 10`; do
- echo "tag-$i" > $DIR/$tfile-$i
- done
- fail $SINGLEMDS
- for i in `seq 10`; do
- grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
- done
+ replay_barrier $SINGLEMDS
+ for i in $(seq 10); do
+ echo "tag-$i" > $DIR/$tfile-$i
+ done
+ fail $SINGLEMDS
+ for i in $(seq 10); do
+ grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
+ done
}
run_test 4a "|x| 10 open(O_CREAT)s"
test_4b() {
- replay_barrier $SINGLEMDS
- rm -rf $DIR/$tfile-*
- fail $SINGLEMDS
- $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
+ for i in $(seq 10); do
+ echo "tag-$i" > $DIR/$tfile-$i
+ done
+ replay_barrier $SINGLEMDS
+ rm -rf $DIR/$tfile-*
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile-* &&
+ error "$CHECKSTAT $DIR/$tfile-* attribute check should fail" ||
+ true
}
run_test 4b "|x| rm 10 files"
# The idea is to get past the first block of precreated files on both
# osts, and then replay.
test_5() {
- replay_barrier $SINGLEMDS
- for i in `seq 220`; do
- echo "tag-$i" > $DIR/$tfile-$i
- done
- fail $SINGLEMDS
- for i in `seq 220`; do
- grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
- done
- rm -rf $DIR/$tfile-*
- sleep 3
- # waiting for commitment of removal
+ replay_barrier $SINGLEMDS
+ for i in $(seq 220); do
+ echo "tag-$i" > $DIR/$tfile-$i
+ done
+ fail $SINGLEMDS
+ for i in $(seq 220); do
+ grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
+ done
+ rm -rf $DIR/$tfile-*
+ sleep 3
+ # waiting for commitment of removal
}
run_test 5 "|x| 220 open(O_CREAT)"
-
test_6a() { # was test_6
- mkdir -p $DIR/$tdir
- replay_barrier $SINGLEMDS
- mcreate $DIR/$tdir/$tfile
- fail $SINGLEMDS
- $CHECKSTAT -t dir $DIR/$tdir || return 1
- $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
- sleep 2
- # waiting for log process thread
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ replay_barrier $SINGLEMDS
+ mcreate $DIR/$tdir/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT -t dir $DIR/$tdir ||
+ error "$CHECKSTAT $DIR/$tdir attribute check failed"
+ $CHECKSTAT -t file $DIR/$tdir/$tfile ||
+ error "$CHECKSTAT $DIR/$tdir/$tfile attribute check failed"
+ sleep 2
+ # waiting for log process thread
}
run_test 6a "mkdir + contained create"
test_6b() {
- mkdir -p $DIR/$tdir
- replay_barrier $SINGLEMDS
- rm -rf $DIR/$tdir
- fail $SINGLEMDS
- $CHECKSTAT -t dir $DIR/$tdir && return 1 || true
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ replay_barrier $SINGLEMDS
+ rm -rf $DIR/$tdir
+ fail $SINGLEMDS
+ $CHECKSTAT -t dir $DIR/$tdir &&
+ error "$CHECKSTAT $DIR/$tdir attribute check should fail" ||
+ true
}
run_test 6b "|X| rmdir"
test_7() {
- mkdir -p $DIR/$tdir
- replay_barrier $SINGLEMDS
- mcreate $DIR/$tdir/$tfile
- fail $SINGLEMDS
- $CHECKSTAT -t dir $DIR/$tdir || return 1
- $CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
- rm -fr $DIR/$tdir
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ replay_barrier $SINGLEMDS
+ mcreate $DIR/$tdir/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT -t dir $DIR/$tdir ||
+ error "$CHECKSTAT $DIR/$tdir attribute check failed"
+ $CHECKSTAT -t file $DIR/$tdir/$tfile ||
+ error "$CHECKSTAT $DIR/$tdir/$tfile attribute check failed"
+ rm -fr $DIR/$tdir
}
run_test 7 "mkdir |X| contained create"
test_8() {
- # make sure no side-effect from previous test.
- rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile mo_c || return 4
- MULTIPID=$!
- fail $SINGLEMDS
- ls $DIR/$tfile
- $CHECKSTAT -t file $DIR/$tfile || return 1
- kill -USR1 $MULTIPID || return 2
- wait $MULTIPID || return 3
- rm $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile mo_c ||
+ error "multiop mknod $DIR/$tfile failed"
+ MULTIPID=$!
+ fail $SINGLEMDS
+ ls $DIR/$tfile
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ kill -USR1 $MULTIPID || error "multiop mknod $MULTIPID not running"
+ wait $MULTIPID || error "multiop mknod $MULTIPID failed"
+ rm $DIR/$tfile
}
run_test 8 "creat open |X| close"
test_9() {
- replay_barrier $SINGLEMDS
- mcreate $DIR/$tfile
- local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
- fail $SINGLEMDS
- local new_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
-
- echo " old_inum == $old_inum, new_inum == $new_inum"
- if [ $old_inum -eq $new_inum ] ;
- then
- echo " old_inum and new_inum match"
- else
- echo "!!!! old_inum and new_inum NOT match"
- return 1
- fi
- rm $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ mcreate $DIR/$tfile
+ local old_inum=$(ls -i $DIR/$tfile | awk '{print $1}')
+ fail $SINGLEMDS
+ local new_inum=$(ls -i $DIR/$tfile | awk '{print $1}')
+
+ echo " old_inum == $old_inum, new_inum == $new_inum"
+ if [ $old_inum -eq $new_inum ] ;
+ then
+ echo "old_inum and new_inum match"
+ else
+ echo " old_inum and new_inum do not match"
+ error "old index($old_inum) does not match new index($new_inum)"
+ fi
+ rm $DIR/$tfile
}
-run_test 9 "|X| create (same inum/gen)"
+run_test 9 "|X| create (same inum/gen)"
test_10() {
- mcreate $DIR/$tfile
- replay_barrier $SINGLEMDS
- mv $DIR/$tfile $DIR/$tfile-2
- rm -f $DIR/$tfile
- fail $SINGLEMDS
- $CHECKSTAT $DIR/$tfile && return 1
- $CHECKSTAT $DIR/$tfile-2 ||return 2
- rm $DIR/$tfile-2
- return 0
+ mcreate $DIR/$tfile || error "mcreate $DIR/$tfile failed"
+ replay_barrier $SINGLEMDS
+ mv $DIR/$tfile $DIR/$tfile-2
+ rm -f $DIR/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT $DIR/$tfile &&
+ error "$CHECKSTAT $DIR/$tfile attribute check should fail"
+ $CHECKSTAT $DIR/$tfile-2 ||
+ error "$CHECKSTAT $DIR/$tfile-2 attribute check failed"
+ rm $DIR/$tfile-2
+ return 0
}
run_test 10 "create |X| rename unlink"
test_11() {
- mcreate $DIR/$tfile
- echo "old" > $DIR/$tfile
- mv $DIR/$tfile $DIR/$tfile-2
- replay_barrier $SINGLEMDS
- echo "new" > $DIR/$tfile
- grep new $DIR/$tfile
- grep old $DIR/$tfile-2
- fail $SINGLEMDS
- grep new $DIR/$tfile || return 1
- grep old $DIR/$tfile-2 || return 2
+ mcreate $DIR/$tfile || error "mcreate $DIR/$tfile failed"
+ echo "old" > $DIR/$tfile
+ mv $DIR/$tfile $DIR/$tfile-2
+ replay_barrier $SINGLEMDS
+ echo "new" > $DIR/$tfile
+ grep new $DIR/$tfile
+ grep old $DIR/$tfile-2
+ fail $SINGLEMDS
+ grep new $DIR/$tfile || error "grep $DIR/$tfile failed"
+ grep old $DIR/$tfile-2 || error "grep $DIR/$tfile-2 failed"
}
run_test 11 "create open write rename |X| create-old-name read"
test_12() {
- mcreate $DIR/$tfile
- multiop_bg_pause $DIR/$tfile o_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
+ mcreate $DIR/$tfile || error "mcreate $DIR/$tfile failed"
+ multiop_bg_pause $DIR/$tfile o_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
- fail $SINGLEMDS
- [ -e $DIR/$tfile ] && return 2
- return 0
+ fail $SINGLEMDS
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
}
run_test 12 "open, unlink |X| close"
-
# 1777 - replay open after committed chmod that would make
# a regular open a failure
test_13() {
- mcreate $DIR/$tfile
- multiop_bg_pause $DIR/$tfile O_wc || return 3
- pid=$!
- chmod 0 $DIR/$tfile
- $CHECKSTAT -p 0 $DIR/$tfile
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
+ mcreate $DIR/$tfile || error "mcreate $DIR/$tfile failed"
+ multiop_bg_pause $DIR/$tfile O_wc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ chmod 0 $DIR/$tfile
+ $CHECKSTAT -p 0 $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
- $CHECKSTAT -s 1 -p 0 $DIR/$tfile || return 2
- rm $DIR/$tfile || return 4
- return 0
+ $CHECKSTAT -s 1 -p 0 $DIR/$tfile ||
+ error "second $CHECKSTAT $DIR/$tfile attribute check failed"
+ rm $DIR/$tfile || error "rm $DIR/$tfile failed"
+ return 0
}
run_test 13 "open chmod 0 |x| write close"
test_14() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 4
- pid=$!
- rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
- kill -USR1 $pid || return 1
- wait $pid || return 2
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
- fail $SINGLEMDS
- [ -e $DIR/$tfile ] && return 3
- return 0
+ fail $SINGLEMDS
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
}
run_test 14 "open(O_CREAT), unlink |X| close"
test_15() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 5
- pid=$!
- rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
- touch $DIR/g11 || return 1
- kill -USR1 $pid
- wait $pid || return 2
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ touch $DIR/$tfile-1 || error "touch $DIR/$tfile-1 failed"
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
- fail $SINGLEMDS
- [ -e $DIR/$tfile ] && return 3
- touch $DIR/h11 || return 4
- return 0
+ fail $SINGLEMDS
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ touch $DIR/$tfile-2 || error "touch $DIR/$tfile-2 failed"
+ return 0
}
run_test 15 "open(O_CREAT), unlink |X| touch new, close"
-
test_16() {
- replay_barrier $SINGLEMDS
- mcreate $DIR/$tfile
- munlink $DIR/$tfile
- mcreate $DIR/$tfile-2
- fail $SINGLEMDS
- [ -e $DIR/$tfile ] && return 1
- [ -e $DIR/$tfile-2 ] || return 2
- munlink $DIR/$tfile-2 || return 3
+ replay_barrier $SINGLEMDS
+ mcreate $DIR/$tfile
+ munlink $DIR/$tfile
+ mcreate $DIR/$tfile-2
+ fail $SINGLEMDS
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ [ -e $DIR/$tfile-2 ] || error "file $DIR/$tfile-2 does not exist"
+ munlink $DIR/$tfile-2 || error "munlink $DIR/$tfile-2 failed"
}
run_test 16 "|X| open(O_CREAT), unlink, touch new, unlink new"
test_17() {
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile O_c || return 4
- pid=$!
- fail $SINGLEMDS
- kill -USR1 $pid || return 1
- wait $pid || return 2
- $CHECKSTAT -t file $DIR/$tfile || return 3
- rm $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile O_c ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ rm $DIR/$tfile
}
run_test 17 "|X| open(O_CREAT), |replay| close"
test_18() {
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile O_tSc || return 8
- pid=$!
- rm -f $DIR/$tfile
- touch $DIR/$tfile-2 || return 1
- echo "pid: $pid will close"
- kill -USR1 $pid
- wait $pid || return 2
-
- fail $SINGLEMDS
- [ -e $DIR/$tfile ] && return 3
- [ -e $DIR/$tfile-2 ] || return 4
- # this touch frequently fails
- touch $DIR/$tfile-3 || return 5
- munlink $DIR/$tfile-2 || return 6
- munlink $DIR/$tfile-3 || return 7
- return 0
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+ touch $DIR/$tfile-2 || error "touch $DIR/$tfile-2 failed"
+ echo "pid: $pid will close"
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+
+ fail $SINGLEMDS
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ [ -e $DIR/$tfile-2 ] || error "file $DIR/$tfile-2 does not exist"
+ # this touch frequently fails
+ touch $DIR/$tfile-3 || error "touch $DIR/$tfile-3 failed"
+ munlink $DIR/$tfile-2 || error "munlink $DIR/$tfile-2 failed"
+ munlink $DIR/$tfile-3 || error "munlink $DIR/$tfile-3 failed"
+ return 0
}
-run_test 18 "|X| open(O_CREAT), unlink, touch new, close, touch, unlink"
+run_test 18 "open(O_CREAT), unlink, touch new, close, touch, unlink"
# bug 1855 (a simpler form of test_11 above)
test_19() {
- replay_barrier $SINGLEMDS
- mcreate $DIR/$tfile
- echo "old" > $DIR/$tfile
- mv $DIR/$tfile $DIR/$tfile-2
- grep old $DIR/$tfile-2
- fail $SINGLEMDS
- grep old $DIR/$tfile-2 || return 2
+ replay_barrier $SINGLEMDS
+ mcreate $DIR/$tfile
+ echo "old" > $DIR/$tfile
+ mv $DIR/$tfile $DIR/$tfile-2
+ grep old $DIR/$tfile-2
+ fail $SINGLEMDS
+ grep old $DIR/$tfile-2 || error "grep $DIR/$tfile-2 failed"
}
-run_test 19 "|X| mcreate, open, write, rename "
+run_test 19 "mcreate, open, write, rename "
test_20a() { # was test_20
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
}
run_test 20a "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
test_20b() { # bug 10480
- BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
+ local wait_timeout=$((TIMEOUT * 4))
+ local BEFOREUSED
+ local AFTERUSED
+
+ BEFOREUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
+ dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
+ while [ ! -e $DIR/$tfile ] ; do
+ usleep 60 # give dd a chance to start
+ done
- dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
- pid=$!
- while [ ! -e $DIR/$tfile ] ; do
- usleep 60 # give dd a chance to start
- done
+ $GETSTRIPE $DIR/$tfile || error "$GETSTRIPE $DIR/$tfile failed"
+ # make it an orphan
+ rm -f $DIR/$tfile || error "rm -f $DIR/$tfile failed"
+ mds_evict_client
+ client_up || client_up || true # reconnect
- $GETSTRIPE $DIR/$tfile || return 1
- rm -f $DIR/$tfile || return 2 # make it an orphan
- mds_evict_client
- client_up || client_up || true # reconnect
+ do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1"
- fail $SINGLEMDS # start orphan recovery
- wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
- wait_mds_ost_sync || return 3
- AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
- log "before $BEFOREUSED, after $AFTERUSED"
- [ $AFTERUSED -gt $((BEFOREUSED + 20)) ] && \
- error "after $AFTERUSED > before $BEFOREUSED"
- return 0
+ fail $SINGLEMDS # start orphan recovery
+ wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
+ wait_delete_completed_mds $wait_timeout ||
+ error "delete did not complete"
+
+ AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
+ log "before $BEFOREUSED, after $AFTERUSED"
+ (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
+ error "after $AFTERUSED > before $BEFOREUSED"
+ return 0
}
run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)"
test_20c() { # bug 10480
- multiop_bg_pause $DIR/$tfile Ow_c || return 1
- pid=$!
+ multiop_bg_pause $DIR/$tfile Ow_c ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
- ls -la $DIR/$tfile
+ ls -la $DIR/$tfile
- mds_evict_client
- client_up || client_up || true # reconnect
+ mds_evict_client
+ client_up || client_up || true # reconnect
- kill -USR1 $pid
- wait $pid || return 1
- [ -s $DIR/$tfile ] || error "File was truncated"
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -s $DIR/$tfile ] || error "File was truncated"
- return 0
+ return 0
}
run_test 20c "check that client eviction does not affect file content"
test_21() {
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile O_tSc || return 5
- pid=$!
- rm -f $DIR/$tfile
- touch $DIR/g11 || return 1
-
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 2
- [ -e $DIR/$tfile ] && return 3
- touch $DIR/h11 || return 4
- return 0
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+ touch $DIR/$tfile-1 || error "touch $DIR/$tfile-1 failed"
+
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ touch $DIR/$tfile-2 || error "touch $DIR/$tfile-2 failed"
+ return 0
}
run_test 21 "|X| open(O_CREAT), unlink touch new, replay, close (test mds_cleanup_orphans)"
test_22() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
- replay_barrier $SINGLEMDS
- rm -f $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ rm -f $DIR/$tfile
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
}
run_test 22 "open(O_CREAT), |X| unlink, replay, close (test mds_cleanup_orphans)"
test_23() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 5
- pid=$!
-
- replay_barrier $SINGLEMDS
- rm -f $DIR/$tfile
- touch $DIR/g11 || return 1
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 2
- [ -e $DIR/$tfile ] && return 3
- touch $DIR/h11 || return 4
- return 0
+ replay_barrier $SINGLEMDS
+ rm -f $DIR/$tfile
+ touch $DIR/$tfile-1 || error "touch $DIR/$tfile-1 failed"
+
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ touch $DIR/$tfile-2 || error "touch $DIR/$tfile-2 failed"
+ return 0
}
run_test 23 "open(O_CREAT), |X| unlink touch new, replay, close (test mds_cleanup_orphans)"
test_24() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- rm -f $DIR/$tfile
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ rm -f $DIR/$tfile
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
}
run_test 24 "open(O_CREAT), replay, unlink, close (test mds_cleanup_orphans)"
test_25() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
}
run_test 25 "open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
test_26() {
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5
- pid1=$!
- multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6
- pid2=$!
- rm -f $DIR/$tfile-1
- rm -f $DIR/$tfile-2
- kill -USR1 $pid2
- wait $pid2 || return 1
-
- fail $SINGLEMDS
- kill -USR1 $pid1
- wait $pid1 || return 2
- [ -e $DIR/$tfile-1 ] && return 3
- [ -e $DIR/$tfile-2 ] && return 4
- return 0
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile-1 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-1 failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile-2 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-2 failed"
+ pid2=$!
+ rm -f $DIR/$tfile-1
+ rm -f $DIR/$tfile-2
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid2 || error "second multiop $pid2 failed"
+
+ fail $SINGLEMDS
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ [ -e $DIR/$tfile-1 ] && error "file $DIR/$tfile-1 should not exist"
+ [ -e $DIR/$tfile-2 ] && error "file $DIR/$tfile-2 should not exist"
+ return 0
}
run_test 26 "|X| open(O_CREAT), unlink two, close one, replay, close one (test mds_cleanup_orphans)"
test_27() {
- replay_barrier $SINGLEMDS
- multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5
- pid1=$!
- multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6
- pid2=$!
- rm -f $DIR/$tfile-1
- rm -f $DIR/$tfile-2
-
- fail $SINGLEMDS
- kill -USR1 $pid1
- wait $pid1 || return 1
- kill -USR1 $pid2
- wait $pid2 || return 2
- [ -e $DIR/$tfile-1 ] && return 3
- [ -e $DIR/$tfile-2 ] && return 4
- return 0
+ replay_barrier $SINGLEMDS
+ multiop_bg_pause $DIR/$tfile-1 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-1 failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile-2 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-2 failed"
+ pid2=$!
+ rm -f $DIR/$tfile-1
+ rm -f $DIR/$tfile-2
+
+ fail $SINGLEMDS
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid2 || error "second multiop $pid2 failed"
+ [ -e $DIR/$tfile-1 ] && error "file $DIR/$tfile-1 should not exist"
+ [ -e $DIR/$tfile-2 ] && error "file $DIR/$tfile-2 should not exist"
+ return 0
}
run_test 27 "|X| open(O_CREAT), unlink two, replay, close two (test mds_cleanup_orphans)"
test_28() {
- multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5
- pid1=$!
- multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6
- pid2=$!
- replay_barrier $SINGLEMDS
- rm -f $DIR/$tfile-1
- rm -f $DIR/$tfile-2
- kill -USR1 $pid2
- wait $pid2 || return 1
-
- fail $SINGLEMDS
- kill -USR1 $pid1
- wait $pid1 || return 2
- [ -e $DIR/$tfile-1 ] && return 3
- [ -e $DIR/$tfile-2 ] && return 4
- return 0
+ multiop_bg_pause $DIR/$tfile-1 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-1 failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile-2 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-2 failed"
+ pid2=$!
+ replay_barrier $SINGLEMDS
+ rm -f $DIR/$tfile-1
+ rm -f $DIR/$tfile-2
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid2 || error "second multiop $pid2 failed"
+
+ fail $SINGLEMDS
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ [ -e $DIR/$tfile-1 ] && error "file $DIR/$tfile-1 should not exist"
+ [ -e $DIR/$tfile-2 ] && error "file $DIR/$tfile-2 should not exist"
+ return 0
}
run_test 28 "open(O_CREAT), |X| unlink two, close one, replay, close one (test mds_cleanup_orphans)"
test_29() {
- multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5
- pid1=$!
- multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6
- pid2=$!
- replay_barrier $SINGLEMDS
- rm -f $DIR/$tfile-1
- rm -f $DIR/$tfile-2
-
- fail $SINGLEMDS
- kill -USR1 $pid1
- wait $pid1 || return 1
- kill -USR1 $pid2
- wait $pid2 || return 2
- [ -e $DIR/$tfile-1 ] && return 3
- [ -e $DIR/$tfile-2 ] && return 4
- return 0
+ multiop_bg_pause $DIR/$tfile-1 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-1 failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile-2 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-2 failed"
+ pid2=$!
+ replay_barrier $SINGLEMDS
+ rm -f $DIR/$tfile-1
+ rm -f $DIR/$tfile-2
+
+ fail $SINGLEMDS
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid2 || error "second multiop $pid2 failed"
+ [ -e $DIR/$tfile-1 ] && error "file $DIR/$tfile-1 should not exist"
+ [ -e $DIR/$tfile-2 ] && error "file $DIR/$tfile-2 should not exist"
+ return 0
}
run_test 29 "open(O_CREAT), |X| unlink two, replay, close two (test mds_cleanup_orphans)"
test_30() {
- multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5
- pid1=$!
- multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6
- pid2=$!
- rm -f $DIR/$tfile-1
- rm -f $DIR/$tfile-2
+ multiop_bg_pause $DIR/$tfile-1 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-1 failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile-2 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-2 failed"
+ pid2=$!
+ rm -f $DIR/$tfile-1
+ rm -f $DIR/$tfile-2
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- kill -USR1 $pid1
- wait $pid1 || return 1
- kill -USR1 $pid2
- wait $pid2 || return 2
- [ -e $DIR/$tfile-1 ] && return 3
- [ -e $DIR/$tfile-2 ] && return 4
- return 0
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid2 || error "second multiop $pid2 failed"
+ [ -e $DIR/$tfile-1 ] && error "file $DIR/$tfile-1 should not exist"
+ [ -e $DIR/$tfile-2 ] && error "file $DIR/$tfile-2 should not exist"
+ return 0
}
run_test 30 "open(O_CREAT) two, unlink two, replay, close two (test mds_cleanup_orphans)"
test_31() {
- multiop_bg_pause $DIR/$tfile-1 O_tSc || return 5
- pid1=$!
- multiop_bg_pause $DIR/$tfile-2 O_tSc || return 6
- pid2=$!
- rm -f $DIR/$tfile-1
+ multiop_bg_pause $DIR/$tfile-1 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-1 failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile-2 O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile-2 failed"
+ pid2=$!
+ rm -f $DIR/$tfile-1
- replay_barrier $SINGLEMDS
- rm -f $DIR/$tfile-2
- fail $SINGLEMDS
- kill -USR1 $pid1
- wait $pid1 || return 1
- kill -USR1 $pid2
- wait $pid2 || return 2
- [ -e $DIR/$tfile-1 ] && return 3
- [ -e $DIR/$tfile-2 ] && return 4
- return 0
+ replay_barrier $SINGLEMDS
+ rm -f $DIR/$tfile-2
+ fail $SINGLEMDS
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid2 || error "second multiop $pid2 failed"
+ [ -e $DIR/$tfile-1 ] && error "file $DIR/$tfile-1 should not exist"
+ [ -e $DIR/$tfile-2 ] && error "file $DIR/$tfile-2 should not exist"
+ return 0
}
run_test 31 "open(O_CREAT) two, unlink one, |X| unlink one, close two (test mds_cleanup_orphans)"
# tests for bug 2104; completion without crashing is success. The close is
# stale, but we always return 0 for close, so the app never sees it.
test_32() {
- multiop_bg_pause $DIR/$tfile O_c || return 2
- pid1=$!
- multiop_bg_pause $DIR/$tfile O_c || return 3
- pid2=$!
- mds_evict_client
- client_up || client_up || return 1
- kill -USR1 $pid1
- kill -USR1 $pid2
- wait $pid1 || return 4
- wait $pid2 || return 5
- return 0
+ multiop_bg_pause $DIR/$tfile O_c ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid1=$!
+ multiop_bg_pause $DIR/$tfile O_c ||
+ error "second multiop_bg_pause $DIR/$tfile failed"
+ pid2=$!
+ mds_evict_client
+ client_up || client_up || error "client_up failed"
+ kill -USR1 $pid1 || error "multiop $pid1 not running"
+ kill -USR1 $pid2 || error "second multiop $pid2 not running"
+ wait $pid1 || error "multiop $pid1 failed"
+ wait $pid2 || error "second multiop $pid2 failed"
+ return 0
}
run_test 32 "close() notices client eviction; close() after client eviction"
test_33a() {
- createmany -o $DIR/$tfile-%d 10
- replay_barrier_nosync $SINGLEMDS
- fail_abort $SINGLEMDS
- # recreate shouldn't fail
- createmany -o $DIR/$tfile--%d 10 || return 1
- rm $DIR/$tfile-* -f
- return 0
+ createmany -o $DIR/$tfile-%d 10 ||
+ error "createmany create $DIR/$tfile failed"
+ replay_barrier_nosync $SINGLEMDS
+ fail_abort $SINGLEMDS
+ # recreate shouldn't fail
+ createmany -o $DIR/$tfile--%d 10 ||
+ error "createmany recreate $DIR/$tfile failed"
+ rm $DIR/$tfile-* -f
+ return 0
}
run_test 33a "fid seq shouldn't be reused after abort recovery"
test_33b() {
- #define OBD_FAIL_SEQ_ALLOC 0x1311
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x1311"
-
- createmany -o $DIR/$tfile-%d 10
- replay_barrier_nosync $SINGLEMDS
- fail_abort $SINGLEMDS
- # recreate shouldn't fail
- createmany -o $DIR/$tfile--%d 10 || return 1
- rm $DIR/$tfile-* -f
- return 0
+ #define OBD_FAIL_SEQ_ALLOC 0x1311
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x1311"
+
+ createmany -o $DIR/$tfile-%d 10
+ replay_barrier_nosync $SINGLEMDS
+ fail_abort $SINGLEMDS
+ # recreate shouldn't fail
+ createmany -o $DIR/$tfile--%d 10 ||
+ error "createmany recreate $DIR/$tfile failed"
+ rm $DIR/$tfile-* -f
+ return 0
}
run_test 33b "test fid seq allocation"
test_34() {
- multiop_bg_pause $DIR/$tfile O_c || return 2
- pid=$!
- rm -f $DIR/$tfile
+ multiop_bg_pause $DIR/$tfile O_c ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
- fail_abort $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 3
- [ -e $DIR/$tfile ] && return 1
- sync
- return 0
+ replay_barrier $SINGLEMDS
+ fail_abort $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ sync
+ return 0
}
run_test 34 "abort recovery before client does replay (test mds_cleanup_orphans)"
# bug 2278 - generate one orphan on OST, then destroy it during recovery from llog
test_35() {
- touch $DIR/$tfile
-
-#define OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000119"
- rm -f $DIR/$tfile &
- sleep 1
- sync
- sleep 1
- # give a chance to remove from MDS
- fail_abort $SINGLEMDS
- $CHECKSTAT -t file $DIR/$tfile && return 1 || true
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+
+ #define OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000119"
+ rm -f $DIR/$tfile &
+ sleep 1
+ sync
+ sleep 1
+ # give a chance to remove from MDS
+ fail_abort $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile &&
+ error "$CHECKSTAT $DIR/$tfile attribute check should fail" ||
+ true
}
run_test 35 "test recovery from llog for unlink op"
# b=2432 resent cancel after replay uses wrong cookie,
# so don't resend cancels
test_36() {
- replay_barrier $SINGLEMDS
- touch $DIR/$tfile
- checkstat $DIR/$tfile
- facet_failover $SINGLEMDS
- cancel_lru_locks mdc
- if dmesg | grep "unknown lock cookie"; then
- echo "cancel after replay failed"
- return 1
- fi
+ replay_barrier $SINGLEMDS
+ touch $DIR/$tfile
+ checkstat $DIR/$tfile
+ facet_failover $SINGLEMDS
+ cancel_lru_locks mdc
+ if dmesg | grep "unknown lock cookie"; then
+ error "cancel after replay failed"
+ fi
}
run_test 36 "don't resend cancel"
# b=2368
# directory orphans can't be unlinked from PENDING directory
test_37() {
- rmdir $DIR/$tfile 2>/dev/null
- multiop_bg_pause $DIR/$tfile dD_c || return 2
- pid=$!
- rmdir $DIR/$tfile
+ rmdir $DIR/$tfile 2>/dev/null
+ multiop_bg_pause $DIR/$tfile dD_c ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rmdir $DIR/$tfile
- replay_barrier $SINGLEMDS
- # clear the dmesg buffer so we only see errors from this recovery
- do_facet $SINGLEMDS dmesg -c >/dev/null
- fail_abort $SINGLEMDS
- kill -USR1 $pid
- do_facet $SINGLEMDS dmesg | grep "error .* unlinking .* from PENDING" &&
- return 1
- wait $pid || return 3
- sync
- return 0
+ replay_barrier $SINGLEMDS
+ # clear the dmesg buffer so we only see errors from this recovery
+ do_facet $SINGLEMDS dmesg -c >/dev/null
+ fail_abort $SINGLEMDS
+ kill -USR1 $pid || error "multiop $pid not running"
+ do_facet $SINGLEMDS dmesg | grep "error .* unlinking .* from PENDING" &&
+ error "error unlinking files"
+ wait $pid || error "multiop $pid failed"
+ sync
+ return 0
}
-start_full_debug_logging
run_test 37 "abort recovery before client does replay (test mds_cleanup_orphans for directories)"
-stop_full_debug_logging
test_38() {
- createmany -o $DIR/$tfile-%d 800
- unlinkmany $DIR/$tfile-%d 0 400
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 400 400
- sleep 2
- $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
+ createmany -o $DIR/$tfile-%d 800 ||
+ error "createmany -o $DIR/$tfile failed"
+ unlinkmany $DIR/$tfile-%d 0 400 || error "unlinkmany $DIR/$tfile failed"
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 400 400 ||
+ error "unlinkmany $DIR/$tfile 400 failed"
+ sleep 2
+ $CHECKSTAT -t file $DIR/$tfile-* &&
+ error "$CHECKSTAT $DIR/$tfile-* attribute check should fail" ||
+ true
}
run_test 38 "test recovery from unlink llog (test llog_gen_rec) "
test_39() { # bug 4176
- createmany -o $DIR/$tfile-%d 800
- replay_barrier $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 0 400
- fail $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 400 400
- sleep 2
- $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
+ createmany -o $DIR/$tfile-%d 800 ||
+ error "createmany -o $DIR/$tfile failed"
+ replay_barrier $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 0 400
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 400 400 ||
+ error "unlinkmany $DIR/$tfile 400 failed"
+ sleep 2
+ $CHECKSTAT -t file $DIR/$tfile-* &&
+ error "$CHECKSTAT $DIR/$tfile-* attribute check should fail" ||
+ true
}
run_test 39 "test recovery from unlink llog (test llog_gen_rec) "
#b=2477,2532
test_40(){
- $LCTL mark multiop $MOUNT/$tfile OS_c
- multiop $MOUNT/$tfile OS_c &
- PID=$!
- writeme -s $MOUNT/${tfile}-2 &
- WRITE_PID=$!
- sleep 1
- facet_failover $SINGLEMDS
-#define OBD_FAIL_MDS_CONNECT_NET 0x117
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000117"
- kill -USR1 $PID
- stat1=`count_ost_writes`
- sleep $TIMEOUT
- stat2=`count_ost_writes`
- echo "$stat1, $stat2"
- if [ $stat1 -lt $stat2 ]; then
- echo "writes continuing during recovery"
- RC=0
- else
- echo "writes not continuing during recovery, bug 2477"
- RC=4
- fi
- echo "waiting for writeme $WRITE_PID"
- kill $WRITE_PID
- wait $WRITE_PID
-
- echo "waiting for multiop $PID"
- wait $PID || return 2
- do_facet client munlink $MOUNT/$tfile || return 3
- do_facet client munlink $MOUNT/${tfile}-2 || return 3
- return $RC
+ # always need connection to MDS to verify layout during IO. LU-2628.
+ lctl get_param mdc.*.connect_flags | grep -q layout_lock &&
+ skip "layout_lock needs MDS connection for IO" && return 0
+
+ $LCTL mark "$HOSTNAME multiop $MOUNT/$tfile OS_c"
+ multiop $MOUNT/$tfile OS_c &
+ PID=$!
+ writeme -s $MOUNT/${tfile}-2 &
+ WRITE_PID=$!
+ sleep 1
+ facet_failover $SINGLEMDS
+ #define OBD_FAIL_MDS_CONNECT_NET 0x117
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000117"
+ kill -USR1 $PID
+ stat1=$(count_ost_writes)
+ sleep $TIMEOUT
+ stat2=$(count_ost_writes)
+ echo "$stat1, $stat2"
+ if [ $stat1 -lt $stat2 ]; then
+ echo "writes continuing during recovery"
+ RC=0
+ else
+ echo "writes not continuing during recovery, bug 2477"
+ RC=4
+ fi
+ echo "waiting for writeme $WRITE_PID"
+ kill $WRITE_PID
+ wait $WRITE_PID
+
+ echo "waiting for multiop $PID"
+ wait $PID || error "multiop $PID failed"
+ do_facet client munlink $MOUNT/$tfile ||
+ error "munlink $MOUNT/$tfile failed"
+ do_facet client munlink $MOUNT/${tfile}-2 ||
+ error "munlink $MOUNT/$tfile-2 failed"
+ return $RC
}
run_test 40 "cause recovery in ptlrpc, ensure IO continues"
-
#b=2814
# make sure that a read to one osc doesn't try to double-unlock its page just
# because another osc is invalid. trigger_group_io used to mistakenly return
skip_env "skipping test 41: we don't have a second OST to test with" &&
return
- local f=$MOUNT/$tfile
- # make sure the start of the file is ost1
- $SETSTRIPE -S $((128 * 1024)) -i 0 $f
- do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
- cancel_lru_locks osc
- # fail ost2 and read from ost1
- local mdtosc=$(get_mdtosc_proc_path $SINGLEMDS $ost2_svc)
- local osc2dev=$(do_facet $SINGLEMDS "lctl get_param -n devices" | \
- grep $mdtosc | awk '{print $1}')
- [ -z "$osc2dev" ] && echo "OST: $ost2_svc" && lctl get_param -n devices &&
- return 4
- do_facet $SINGLEMDS $LCTL --device $osc2dev deactivate || return 1
- do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
- do_facet $SINGLEMDS $LCTL --device $osc2dev activate || return 2
- return 0
+ local f=$MOUNT/$tfile
+ # make sure the start of the file is ost1
+ $SETSTRIPE -S $((128 * 1024)) -i 0 $f
+ do_facet client dd if=/dev/zero of=$f bs=4k count=1 ||
+ error "dd on client failed"
+ cancel_lru_locks osc
+ # fail ost2 and read from ost1
+ local mdtosc=$(get_mdtosc_proc_path $SINGLEMDS $ost2_svc)
+ local osc2dev=$(do_facet $SINGLEMDS "lctl get_param -n devices" |
+ grep $mdtosc | awk '{print $1}')
+ [ -z "$osc2dev" ] && echo "OST: $ost2_svc" &&
+ lctl get_param -n devices &&
+ error "OST 2 $osc2dev does not exist"
+ do_facet $SINGLEMDS $LCTL --device $osc2dev deactivate ||
+ error "deactive device on $SINGLEMDS failed"
+ do_facet client dd if=$f of=/dev/null bs=4k count=1 ||
+ error "second dd on client failed"
+ do_facet $SINGLEMDS $LCTL --device $osc2dev activate ||
+ error "active device on $SINGLEMDS failed"
+ return 0
}
run_test 41 "read from a valid osc while other oscs are invalid"
# test MDS recovery after ost failure
test_42() {
- blocks=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
- createmany -o $DIR/$tfile-%d 800
- replay_barrier ost1
- unlinkmany $DIR/$tfile-%d 0 400
- debugsave
- lctl set_param debug=-1
- facet_failover ost1
-
- # osc is evicted, fs is smaller (but only with failout OSTs (bug 7287)
- #blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
- #[ $blocks_after -lt $blocks ] || return 1
- echo wait for MDS to timeout and recover
- sleep $((TIMEOUT * 2))
- debugrestore
- unlinkmany $DIR/$tfile-%d 400 400
- $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true
+ blocks=$(df -P $MOUNT | tail -n 1 | awk '{ print $2 }')
+ createmany -o $DIR/$tfile-%d 800 ||
+ error "createmany -o $DIR/$tfile failed"
+ replay_barrier ost1
+ unlinkmany $DIR/$tfile-%d 0 400
+ debugsave
+ lctl set_param debug=-1
+ facet_failover ost1
+
+ # osc is evicted, fs is smaller (but only with failout OSTs (bug 7287)
+ #blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
+ #[ $blocks_after -lt $blocks ] || return 1
+ echo "wait for MDS to timeout and recover"
+ sleep $((TIMEOUT * 2))
+ debugrestore
+ unlinkmany $DIR/$tfile-%d 400 400 ||
+ error "unlinkmany $DIR/$tfile 400 failed"
+ $CHECKSTAT -t file $DIR/$tfile-* &&
+ error "$CHECKSTAT $DIR/$tfile-* attribute check should fail" ||
+ true
}
run_test 42 "recovery after ost failure"
# timeout in MDS/OST recovery RPC will LBUG MDS
test_43() { # bug 2530
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- replay_barrier $SINGLEMDS
+ replay_barrier $SINGLEMDS
- # OBD_FAIL_OST_CREATE_NET 0x204
- do_facet ost1 "lctl set_param fail_loc=0x80000204"
- fail $SINGLEMDS
- sleep 10
- do_facet ost1 "lctl set_param fail_loc=0"
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ do_facet ost1 "lctl set_param fail_loc=0x80000204"
+ fail $SINGLEMDS
+ sleep 10
- return 0
+ return 0
}
run_test 43 "mds osc import failure during recovery; don't LBUG"
test_44a() { # was test_44
- local at_max_saved=0
-
- mdcdev=`lctl get_param -n devices | awk '/MDT0000-mdc-/ {print $1}'`
- [ "$mdcdev" ] || return 2
- [ $(echo $mdcdev | wc -w) -eq 1 ] || { echo $mdcdev=$mdcdev && return 3; }
-
- # adaptive timeouts slow this way down
- if at_is_enabled; then
- at_max_saved=$(at_max_get mds)
- at_max_set 40 mds
- fi
-
- for i in `seq 1 10`; do
- echo "$i of 10 ($(date +%s))"
- do_facet $SINGLEMDS "lctl get_param -n mdt.*.mdt.timeouts | grep service"
- #define OBD_FAIL_TGT_CONN_RACE 0x701
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
- # lctl below may fail, it is valid case
- $LCTL --device $mdcdev recover
- df $MOUNT
- done
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds
- return 0
+ local at_max_saved=0
+
+ local mdcdev=$($LCTL dl |
+ awk "/${FSNAME}-MDT0000-mdc-/ {if (\$2 == \"UP\") {print \$1}}")
+ [ "$mdcdev" ] || error "${FSNAME}-MDT0000-mdc- not UP"
+ [ $(echo $mdcdev | wc -w) -eq 1 ] ||
+ { $LCTL dl; error "looking for mdcdev=$mdcdev"; }
+
+ # adaptive timeouts slow this way down
+ if at_is_enabled; then
+ at_max_saved=$(at_max_get mds)
+ at_max_set 40 mds
+ fi
+
+ for i in $(seq 1 10); do
+ echo "$i of 10 ($(date +%s))"
+ do_facet $SINGLEMDS \
+ "lctl get_param -n md[ts].*.mdt.timeouts | grep service"
+ #define OBD_FAIL_TGT_CONN_RACE 0x701
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
+ # lctl below may fail, it is valid case
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ done
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds
+ return 0
}
run_test 44a "race in target handle connect"
test_44b() {
- local mdcdev=`lctl get_param -n devices | awk '/MDT0000-mdc-/ {print $1}'`
- [ "$mdcdev" ] || return 2
- [ $(echo $mdcdev | wc -w) -eq 1 ] || { echo $mdcdev=$mdcdev && return 3; }
-
- for i in `seq 1 10`; do
- echo "$i of 10 ($(date +%s))"
- do_facet $SINGLEMDS "lctl get_param -n mdt.*.mdt.timeouts | grep service"
- #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000704"
- # lctl below may fail, it is valid case
- $LCTL --device $mdcdev recover
- df $MOUNT
- done
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- return 0
+ local mdcdev=$($LCTL dl |
+ awk "/${FSNAME}-MDT0000-mdc-/ {if (\$2 == \"UP\") {print \$1}}")
+ [ "$mdcdev" ] || error "${FSNAME}-MDT0000-mdc not up"
+ [ $(echo $mdcdev | wc -w) -eq 1 ] ||
+ { echo mdcdev=$mdcdev; $LCTL dl;
+ error "more than one ${FSNAME}-MDT0000-mdc"; }
+
+ for i in $(seq 1 10); do
+ echo "$i of 10 ($(date +%s))"
+ do_facet $SINGLEMDS \
+ "lctl get_param -n md[ts].*.mdt.timeouts | grep service"
+ #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000704"
+ # lctl below may fail, it is valid case
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ done
+ return 0
}
run_test 44b "race in target handle connect"
test_44c() {
- replay_barrier $SINGLEMDS
- createmany -m $DIR/$tfile-%d 100
-#define OBD_FAIL_TGT_RCVG_FLAG 0x712
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
- fail_abort $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 100 && return 1
- fail $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 100 && return 1
- return 0
+ replay_barrier $SINGLEMDS
+ createmany -m $DIR/$tfile-%d 100 || error "failed to create directories"
+ #define OBD_FAIL_TGT_RCVG_FLAG 0x712
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
+ fail_abort $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && error "unliked after fail abort"
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && error "unliked after fail"
+ return 0
}
run_test 44c "race in target handle connect"
# Handle failed close
test_45() {
- mdcdev=`lctl get_param -n devices | awk '/MDT0000-mdc-/ {print $1}'`
- [ "$mdcdev" ] || return 2
- [ $(echo $mdcdev | wc -w) -eq 1 ] || { echo $mdcdev=$mdcdev && return 3; }
-
- $LCTL --device $mdcdev recover || return 6
-
- multiop_bg_pause $DIR/$tfile O_c || return 1
- pid=$!
+ local mdcdev=$($LCTL get_param -n devices |
+ awk "/ ${FSNAME}-MDT0000-mdc-/ {print \$1}")
+ [ "$mdcdev" ] || error "${FSNAME}-MDT0000-mdc not up"
+ [ $(echo $mdcdev | wc -w) -eq 1 ] ||
+ { echo mdcdev=$mdcdev; $LCTL dl;
+ error "more than one ${FSNAME}-MDT0000-mdc"; }
+
+ $LCTL --device $mdcdev recover ||
+ error "$LCTL --device $mdcdev recover failed"
+
+ multiop_bg_pause $DIR/$tfile O_c ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
- # This will cause the CLOSE to fail before even
- # allocating a reply buffer
- $LCTL --device $mdcdev deactivate || return 4
+ # This will cause the CLOSE to fail before even
+ # allocating a reply buffer
+ $LCTL --device $mdcdev deactivate ||
+ error "$LCTL --device $mdcdev deactivate failed"
- # try the close
- kill -USR1 $pid
- wait $pid || return 1
+ # try the close
+ kill -USR1 $pid || error "multiop $pid not running"
+ wait $pid || error "multiop $pid failed"
- $LCTL --device $mdcdev activate || return 5
- sleep 1
+ $LCTL --device $mdcdev activate ||
+ error "$LCTL --device $mdcdev activate failed"
+ sleep 1
- $CHECKSTAT -t file $DIR/$tfile || return 2
- return 0
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ return 0
}
run_test 45 "Handle failed close"
test_46() {
- dmesg -c >/dev/null
- drop_reply "touch $DIR/$tfile"
- fail $SINGLEMDS
- # ironically, the previous test, 45, will cause a real forced close,
- # so just look for one for this test
- dmesg | grep -i "force closing client file handle for $tfile" && return 1
- return 0
+ dmesg -c >/dev/null
+ drop_reply "touch $DIR/$tfile"
+ fail $SINGLEMDS
+ # ironically, the previous test, 45, will cause a real forced close,
+ # so just look for one for this test
+ dmesg | grep -i "force closing client file handle for $tfile" &&
+ error "found force closing in dmesg"
+ return 0
}
run_test 46 "Don't leak file handle after open resend (3325)"
test_47() { # bug 2824
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- # create some files to make sure precreate has been done on all
- # OSTs. (just in case this test is run independently)
- createmany -o $DIR/$tfile 20 || return 1
+ # create some files to make sure precreate has been done on all
+ # OSTs. (just in case this test is run independently)
+ createmany -o $DIR/$tfile 20 ||
+ error "createmany create $DIR/$tfile failed"
- # OBD_FAIL_OST_CREATE_NET 0x204
- fail ost1
- do_facet ost1 "lctl set_param fail_loc=0x80000204"
- client_up || return 2
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ fail ost1
+ do_facet ost1 "lctl set_param fail_loc=0x80000204"
+ client_up || error "client_up failed"
- # let the MDS discover the OST failure, attempt to recover, fail
- # and recover again.
- sleep $((3 * TIMEOUT))
+ # let the MDS discover the OST failure, attempt to recover, fail
+ # and recover again.
+ sleep $((3 * TIMEOUT))
- # Without 2824, this createmany would hang
- createmany -o $DIR/$tfile 20 || return 3
- unlinkmany $DIR/$tfile 20 || return 4
+ # Without 2824, this createmany would hang
+ createmany -o $DIR/$tfile 20 ||
+ error "createmany recraete $DIR/$tfile failed"
+ unlinkmany $DIR/$tfile 20 || error "unlinkmany $DIR/$tfile failed"
- do_facet ost1 "lctl set_param fail_loc=0"
- return 0
+ return 0
}
run_test 47 "MDS->OSC failure during precreate cleanup (2824)"
test_48() {
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs -- skipping" && return
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+ [ "$OSTCOUNT" -lt "2" ] &&
+ skip_env "$OSTCOUNT < 2 OSTs -- skipping" && return
- replay_barrier $SINGLEMDS
- createmany -o $DIR/$tfile 20 || return 1
- # OBD_FAIL_OST_EROFS 0x216
- facet_failover $SINGLEMDS
- do_facet ost1 "lctl set_param fail_loc=0x80000216"
- client_up || return 2
-
- createmany -o $DIR/$tfile 20 20 || return 2
- unlinkmany $DIR/$tfile 40 || return 3
- return 0
+ replay_barrier $SINGLEMDS
+ createmany -o $DIR/$tfile 20 ||
+ error "createmany -o $DIR/$tfile failed"
+ # OBD_FAIL_OST_EROFS 0x216
+ facet_failover $SINGLEMDS
+ do_facet ost1 "lctl set_param fail_loc=0x80000216"
+ client_up || error "client_up failed"
+
+ createmany -o $DIR/$tfile 20 20 ||
+ error "createmany recraete $DIR/$tfile failed"
+ unlinkmany $DIR/$tfile 40 || error "unlinkmany $DIR/$tfile failed"
+ return 0
}
run_test 48 "MDS->OSC failure during precreate cleanup (2824)"
test_50() {
- local mdtosc=$(get_mdtosc_proc_path $SINGLEMDS $ost1_svc)
- local oscdev=$(do_facet $SINGLEMDS "lctl get_param -n devices" | \
- grep $mdtosc | awk '{print $1}')
- [ "$oscdev" ] || return 1
- do_facet $SINGLEMDS $LCTL --device $oscdev recover || return 2
- do_facet $SINGLEMDS $LCTL --device $oscdev recover || return 3
- # give the mds_lov_sync threads a chance to run
- sleep 5
+ local mdtosc=$(get_mdtosc_proc_path $SINGLEMDS $ost1_svc)
+ local oscdev=$(do_facet $SINGLEMDS "lctl get_param -n devices" |
+ grep $mdtosc | awk '{print $1}')
+ [ "$oscdev" ] || error "could not find OSC device on MDS"
+ do_facet $SINGLEMDS $LCTL --device $oscdev recover ||
+ error "OSC device $oscdev recovery failed"
+ do_facet $SINGLEMDS $LCTL --device $oscdev recover ||
+ error "second OSC device $oscdev recovery failed"
+ # give the mds_lov_sync threads a chance to run
+ sleep 5
}
run_test 50 "Double OSC recovery, don't LASSERT (3812)"
# b3764 timed out lock replay
test_52() {
- touch $DIR/$tfile
- cancel_lru_locks mdc
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.90) ] &&
+ skip "MDS prior to 2.6.90 handle LDLM_REPLY_NET incorrectly" &&
+ return 0
- multiop $DIR/$tfile s || return 1
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_LDLM_REPLY 0x30c
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
- fail $SINGLEMDS || return 2
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ cancel_lru_locks mdc
+
+ multiop_bg_pause $DIR/$tfile s_s || error "multiop $DIR/$tfile failed"
+ mpid=$!
+
+ #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000157"
- $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
+ fail $SINGLEMDS || error "fail $SINGLEMDS failed"
+ kill -USR1 $mpid
+ wait $mpid || error "multiop_bg_pause pid failed"
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ lctl set_param fail_loc=0x0
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+ rm -f $DIR/$tfile
}
run_test 52 "time out lock replay (3764)"
# bug 3462 - simultaneous MDC requests
test_53a() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
- # give multiop a change to open
- sleep 1
-
- #define OBD_FAIL_MDS_CLOSE_NET 0x115
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
-
- mcreate $DIR/${tdir}-2/f || return 1
-
- # close should still be here
- [ -d /proc/$close_pid ] || return 2
-
- replay_barrier_nodf $SINGLEMDS
- fail $SINGLEMDS
- wait $close_pid || return 3
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5
- rm -rf $DIR/${tdir}-*
+ [[ $(lctl get_param mdc.*.import |
+ grep "connect_flags:.*multi_mod_rpc") ]] ||
+ { skip "Need MDC with 'multi_mod_rpcs' feature"; return 0; }
+
+ cancel_lru_locks mdc # cleanup locks from former test cases
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+ # give multiop a change to open
+ sleep 1
+
+ #define OBD_FAIL_MDS_CLOSE_NET 0x115
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+
+ mcreate $DIR/${tdir}-2/f || error "mcreate $DIR/${tdir}-2/f failed"
+
+ # close should still be here
+ [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
+
+ replay_barrier_nodf $SINGLEMDS
+ fail $SINGLEMDS
+ wait $close_pid || error "close_pid $close_pid failed"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
}
run_test 53a "|X| close request while two MDC requests in flight"
test_53b() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop_bg_pause $DIR/${tdir}-1/f O_c || return 6
- close_pid=$!
-
- #define OBD_FAIL_MDS_REINT_NET 0x107
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000107"
- mcreate $DIR/${tdir}-2/f &
- open_pid=$!
- sleep 1
-
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
- wait $close_pid || return 1
- # open should still be here
- [ -d /proc/$open_pid ] || return 2
-
- replay_barrier_nodf $SINGLEMDS
- fail $SINGLEMDS
- wait $open_pid || return 3
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5
- rm -rf $DIR/${tdir}-*
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop_bg_pause $DIR/${tdir}-1/f O_c ||
+ error "multiop_bg_pause $DIR/${tdir}-1/f failed"
+ close_pid=$!
+
+ #define OBD_FAIL_MDS_REINT_NET 0x107
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000107"
+ mcreate $DIR/${tdir}-2/f &
+ open_pid=$!
+ sleep 1
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+ wait $close_pid || error "close_pid $close_pid failed"
+ # open should still be here
+ [ -d /proc/$open_pid ] || error "open_pid doesn't exist"
+
+ replay_barrier_nodf $SINGLEMDS
+ fail $SINGLEMDS
+ wait $open_pid || error "open_pid failed"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
}
run_test 53b "|X| open request while two MDC requests in flight"
test_53c() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
-
- #define OBD_FAIL_MDS_REINT_NET 0x107
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000107"
- mcreate $DIR/${tdir}-2/f &
- open_pid=$!
- sleep 1
-
- #define OBD_FAIL_MDS_CLOSE_NET 0x115
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
-
- #bz20647: make sure all pids are exists before failover
- [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
- [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
- replay_barrier_nodf $SINGLEMDS
- fail_nodf $SINGLEMDS
- wait $open_pid || return 1
- sleep 2
- # close should be gone
- [ -d /proc/$close_pid ] && return 2
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4
- rm -rf $DIR/${tdir}-*
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+
+ #define OBD_FAIL_MDS_REINT_NET 0x107
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000107"
+ mcreate $DIR/${tdir}-2/f &
+ open_pid=$!
+ sleep 1
+
+ #define OBD_FAIL_MDS_CLOSE_NET 0x115
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+
+ #bz20647: make sure all pids exist before failover
+ [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
+ [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
+ replay_barrier_nodf $SINGLEMDS
+ fail_nodf $SINGLEMDS
+ wait $open_pid || error "open_pid failed"
+ sleep 2
+ # close should be gone
+ [ -d /proc/$close_pid ] && error "close_pid should not exist"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
}
run_test 53c "|X| open request and close request while two MDC requests in flight"
test_53d() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
- # give multiop a chance to open
- sleep 1
-
- #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013b"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- mcreate $DIR/${tdir}-2/f || return 1
-
- # close should still be here
- [ -d /proc/$close_pid ] || return 2
- fail $SINGLEMDS
- wait $close_pid || return 3
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5
- rm -rf $DIR/${tdir}-*
-}
-run_test 53d "|X| close reply while two MDC requests in flight"
+ [[ $(lctl get_param mdc.*.import |
+ grep "connect_flags:.*multi_mod_rpc") ]] ||
+ { skip "Need MDC with 'multi_mod_rpcs' feature"; return 0; }
+
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013b"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ mcreate $DIR/${tdir}-2/f || error "mcreate $DIR/${tdir}-2/f failed"
+
+ # close should still be here
+ [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
+ fail $SINGLEMDS
+ wait $close_pid || error "close_pid failed"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
+}
+run_test 53d "close reply while two MDC requests in flight"
test_53e() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
-
- #define OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x119"
- mcreate $DIR/${tdir}-2/f &
- open_pid=$!
- sleep 1
-
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
- wait $close_pid || return 1
- # open should still be here
- [ -d /proc/$open_pid ] || return 2
-
- replay_barrier_nodf $SINGLEMDS
- fail $SINGLEMDS
- wait $open_pid || return 3
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 4
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 5
- rm -rf $DIR/${tdir}-*
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+
+ #define OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x119"
+ mcreate $DIR/${tdir}-2/f &
+ open_pid=$!
+ sleep 1
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+ wait $close_pid || error "close_pid failed"
+ # open should still be here
+ [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
+
+ replay_barrier_nodf $SINGLEMDS
+ fail $SINGLEMDS
+ wait $open_pid || error "open_pid failed"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
}
run_test 53e "|X| open reply while two MDC requests in flight"
test_53f() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
-
- #define OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x119"
- mcreate $DIR/${tdir}-2/f &
- open_pid=$!
- sleep 1
-
- #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013b"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
-
- #bz20647: make sure all pids are exists before failover
- [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
- [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
- replay_barrier_nodf $SINGLEMDS
- fail_nodf $SINGLEMDS
- wait $open_pid || return 1
- sleep 2
- # close should be gone
- [ -d /proc/$close_pid ] && return 2
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4
- rm -rf $DIR/${tdir}-*
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+
+ #define OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x119"
+ mcreate $DIR/${tdir}-2/f &
+ open_pid=$!
+ sleep 1
+
+ #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013b"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+
+ #bz20647: make sure all pids are exists before failover
+ [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
+ [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
+ replay_barrier_nodf $SINGLEMDS
+ fail_nodf $SINGLEMDS
+ wait $open_pid || error "open_pid failed"
+ sleep 2
+ # close should be gone
+ [ -d /proc/$close_pid ] && error "close_pid should not exist"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
}
run_test 53f "|X| open reply and close reply while two MDC requests in flight"
test_53g() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
-
- #define OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x119"
- mcreate $DIR/${tdir}-2/f &
- open_pid=$!
- sleep 1
-
- #define OBD_FAIL_MDS_CLOSE_NET 0x115
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
-
- #bz20647: make sure all pids are exists before failover
- [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
- [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
- replay_barrier_nodf $SINGLEMDS
- fail_nodf $SINGLEMDS
- wait $open_pid || return 1
- sleep 2
- # close should be gone
- [ -d /proc/$close_pid ] && return 2
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4
- rm -rf $DIR/${tdir}-*
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+
+ #define OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x119"
+ mcreate $DIR/${tdir}-2/f &
+ open_pid=$!
+ sleep 1
+
+ #define OBD_FAIL_MDS_CLOSE_NET 0x115
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+
+ #bz20647: make sure all pids are exists before failover
+ [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
+ [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
+ replay_barrier_nodf $SINGLEMDS
+ fail_nodf $SINGLEMDS
+ wait $open_pid || error "open_pid failed"
+ sleep 2
+ # close should be gone
+ [ -d /proc/$close_pid ] && error "close_pid should not exist"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
}
run_test 53g "|X| drop open reply and close request while close and open are both in flight"
test_53h() {
- cancel_lru_locks mdc # cleanup locks from former test cases
- rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
-
- mkdir -p $DIR/${tdir}-1
- mkdir -p $DIR/${tdir}-2
- multiop $DIR/${tdir}-1/f O_c &
- close_pid=$!
-
- #define OBD_FAIL_MDS_REINT_NET 0x107
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000107"
- mcreate $DIR/${tdir}-2/f &
- open_pid=$!
- sleep 1
-
- #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013b"
- kill -USR1 $close_pid
- cancel_lru_locks mdc # force the close
- sleep 1
-
- #bz20647: make sure all pids are exists before failover
- [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
- [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
- replay_barrier_nodf $SINGLEMDS
- fail_nodf $SINGLEMDS
- wait $open_pid || return 1
- sleep 2
- # close should be gone
- [ -d /proc/$close_pid ] && return 2
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
-
- $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
- $CHECKSTAT -t file $DIR/${tdir}-2/f || return 4
- rm -rf $DIR/${tdir}-*
-}
-run_test 53h "|X| open request and close reply while two MDC requests in flight"
-
-#b_cray 54 "|X| open request and close reply while two MDC requests in flight"
+ cancel_lru_locks mdc # cleanup locks from former test cases
+
+ mkdir $DIR/${tdir}-1 || error "mkdir $DIR/${tdir}-1 failed"
+ mkdir $DIR/${tdir}-2 || error "mkdir $DIR/${tdir}-2 failed"
+ multiop $DIR/${tdir}-1/f O_c &
+ close_pid=$!
+
+ #define OBD_FAIL_MDS_REINT_NET 0x107
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000107"
+ mcreate $DIR/${tdir}-2/f &
+ open_pid=$!
+ sleep 1
+
+ #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013b"
+ kill -USR1 $close_pid
+ cancel_lru_locks mdc # force the close
+ sleep 1
+
+ #bz20647: make sure all pids are exists before failover
+ [ -d /proc/$close_pid ] || error "close_pid doesn't exist"
+ [ -d /proc/$open_pid ] || error "open_pid doesn't exists"
+ replay_barrier_nodf $SINGLEMDS
+ fail_nodf $SINGLEMDS
+ wait $open_pid || error "open_pid failed"
+ sleep 2
+ # close should be gone
+ [ -d /proc/$close_pid ] && error "close_pid should not exist"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f ||
+ error "$CHECKSTAT $DIR/${tdir}-1/f attribute check failed"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f ||
+ error "$CHECKSTAT $DIR/${tdir}-2/f attribute check failed"
+ rm -rf $DIR/${tdir}-*
+}
+run_test 53h "open request and close reply while two MDC requests in flight"
#b3761 ASSERTION(hash != 0) failed
test_55() {
#recovery one mds-ost setattr from llog
test_57() {
-#define OBD_FAIL_MDS_OST_SETATTR 0x12c
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012c"
- touch $DIR/$tfile
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- sleep 1
- $CHECKSTAT -t file $DIR/$tfile || return 1
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
- rm $DIR/$tfile
+ #define OBD_FAIL_MDS_OST_SETATTR 0x12c
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012c"
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ sleep 1
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile attribute check failed"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ rm $DIR/$tfile
}
run_test 57 "test recovery from llog for setattr op"
+cleanup_58() {
+ zconf_umount $(hostname) $MOUNT2
+ trap - EXIT
+}
+
#recovery many mds-ost setattr from llog
test_58a() {
- mkdir -p $DIR/$tdir
-#define OBD_FAIL_MDS_OST_SETATTR 0x12c
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012c"
- createmany -o $DIR/$tdir/$tfile-%d 2500
- replay_barrier $SINGLEMDS
- fail $SINGLEMDS
- sleep 2
- $CHECKSTAT -t file $DIR/$tdir/$tfile-* >/dev/null || return 1
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
- unlinkmany $DIR/$tdir/$tfile-%d 2500
- rmdir $DIR/$tdir
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_MDS_OST_SETATTR 0x12c
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012c"
+ createmany -o $DIR/$tdir/$tfile-%d 2500
+ replay_barrier $SINGLEMDS
+ fail $SINGLEMDS
+ sleep 2
+ $CHECKSTAT -t file $DIR/$tdir/$tfile-* >/dev/null ||
+ error "$CHECKSTAT $DIR/$tfile-* attribute check failed"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ unlinkmany $DIR/$tdir/$tfile-%d 2500 ||
+ error "unlinkmany $DIR/$tfile failed"
+ rmdir $DIR/$tdir
}
run_test 58a "test recovery from llog for setattr op (test llog_gen_rec)"
test_58b() {
- local orig
- local new
+ local orig
+ local new
- large_xattr_enabled &&
- orig="$(generate_string $(max_xattr_size))" || orig="bar"
+ trap cleanup_58 EXIT
- mount_client $MOUNT2
- mkdir -p $DIR/$tdir
- touch $DIR/$tdir/$tfile
- replay_barrier $SINGLEMDS
- setfattr -n trusted.foo -v $orig $DIR/$tdir/$tfile
- fail $SINGLEMDS
- new=$(get_xattr_value trusted.foo $MOUNT2/$tdir/$tfile)
- [[ "$new" = "$orig" ]] || return 1
- rm -f $DIR/$tdir/$tfile
- rmdir $DIR/$tdir
- zconf_umount `hostname` $MOUNT2
+ large_xattr_enabled &&
+ orig="$(generate_string $(max_xattr_size))" || orig="bar"
+ # Original extended attribute can be long. Print a small version of
+ # attribute if an error occurs
+ local sm_msg=$(printf "%.9s" $orig)
+
+ mount_client $MOUNT2 || error "mount_client on $MOUNT2 failed"
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ touch $DIR/$tdir/$tfile || error "touch $DIR/$tdir/$tfile failed"
+ replay_barrier $SINGLEMDS
+ setfattr -n trusted.foo -v $orig $DIR/$tdir/$tfile
+ fail $SINGLEMDS
+ new=$(get_xattr_value trusted.foo $MOUNT2/$tdir/$tfile)
+ [[ "$new" = "$orig" ]] ||
+ error "xattr set ($sm_msg...) differs from xattr get ($new)"
+ rm -f $DIR/$tdir/$tfile
+ rmdir $DIR/$tdir
+ cleanup_58
+ wait_clients_import_state ${CLIENTS:-$HOSTNAME} "mgs" FULL
}
run_test 58b "test replay of setxattr op"
test_58c() { # bug 16570
- local orig
- local orig1
- local new
-
- if large_xattr_enabled; then
- local xattr_size=$(max_xattr_size)
- orig="$(generate_string $((xattr_size / 2)))"
- orig1="$(generate_string $xattr_size)"
- else
- orig="bar"
- orig1="bar1"
- fi
-
- mount_client $MOUNT2
- mkdir -p $DIR/$tdir
- touch $DIR/$tdir/$tfile
- drop_request "setfattr -n trusted.foo -v $orig $DIR/$tdir/$tfile" ||
- return 1
- new=$(get_xattr_value trusted.foo $MOUNT2/$tdir/$tfile)
- [[ "$new" = "$orig" ]] || return 2
- drop_reint_reply "setfattr -n trusted.foo1 -v $orig1 $DIR/$tdir/$tfile" ||
- return 3
- new=$(get_xattr_value trusted.foo1 $MOUNT2/$tdir/$tfile)
- [[ "$new" = "$orig1" ]] || return 4
- rm -f $DIR/$tdir/$tfile
- rmdir $DIR/$tdir
- zconf_umount $HOSTNAME $MOUNT2
+ local orig
+ local orig1
+ local new
+
+ trap cleanup_58 EXIT
+
+ if large_xattr_enabled; then
+ local xattr_size=$(max_xattr_size)
+ orig="$(generate_string $((xattr_size / 2)))"
+ orig1="$(generate_string $xattr_size)"
+ else
+ orig="bar"
+ orig1="bar1"
+ fi
+
+ # PING_INTERVAL max(obd_timeout / 4, 1U)
+ sleep $((TIMEOUT / 4))
+
+ # Original extended attribute can be long. Print a small version of
+ # attribute if an error occurs
+ local sm_msg=$(printf "%.9s" $orig)
+ local sm_msg1=$(printf "%.9s" $orig1)
+
+ mount_client $MOUNT2 || error "mount_client on $MOUNT2 failed"
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ touch $DIR/$tdir/$tfile || error "touch $DIR/$tdir/$tfile failed"
+ drop_request "setfattr -n trusted.foo -v $orig $DIR/$tdir/$tfile" ||
+ error "drop_request for setfattr failed"
+ new=$(get_xattr_value trusted.foo $MOUNT2/$tdir/$tfile)
+ [[ "$new" = "$orig" ]] ||
+ error "xattr set ($sm_msg...) differs from xattr get ($new)"
+ drop_reint_reply "setfattr -n trusted.foo1 \
+ -v $orig1 $DIR/$tdir/$tfile" ||
+ error "drop_reint_reply for setfattr failed"
+ new=$(get_xattr_value trusted.foo1 $MOUNT2/$tdir/$tfile)
+ [[ "$new" = "$orig1" ]] ||
+ error "second xattr set ($sm_msg1...) differs xattr get ($new)"
+ rm -f $DIR/$tdir/$tfile
+ rmdir $DIR/$tdir
+ cleanup_58
}
run_test 58c "resend/reconstruct setxattr op"
# log_commit_thread vs filter_destroy race used to lead to import use after free
# bug 11658
test_59() {
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
-
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile-%d 200
- sync
- unlinkmany $DIR/$tdir/$tfile-%d 200
-#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507
- do_facet ost1 "lctl set_param fail_loc=0x507"
- fail ost1
- fail $SINGLEMDS
- do_facet ost1 "lctl set_param fail_loc=0x0"
- sleep 20
- rmdir $DIR/$tdir
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ createmany -o $DIR/$tdir/$tfile-%d 200 ||
+ error "createmany create files failed"
+ sync
+ unlinkmany $DIR/$tdir/$tfile-%d 200 ||
+ error "unlinkmany $DIR/$tdir/$tfile failed"
+ #define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507
+ do_facet ost1 "lctl set_param fail_loc=0x507"
+ fail ost1
+ fail $SINGLEMDS
+ do_facet ost1 "lctl set_param fail_loc=0x0"
+ sleep 20
+ rmdir $DIR/$tdir
}
run_test 59 "test log_commit_thread vs filter_destroy race"
# race between add unlink llog vs cat log init in post_recovery (only for b1_6)
# bug 12086: should no oops and No ctxt error for this test
test_60() {
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile-%d 200
- replay_barrier $SINGLEMDS
- unlinkmany $DIR/$tdir/$tfile-%d 0 100
- fail $SINGLEMDS
- unlinkmany $DIR/$tdir/$tfile-%d 100 100
- local no_ctxt=`dmesg | grep "No ctxt"`
- [ -z "$no_ctxt" ] || error "ctxt is not initialized in recovery"
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ createmany -o $DIR/$tdir/$tfile-%d 200 ||
+ error "createmany create files failed"
+ replay_barrier $SINGLEMDS
+ unlinkmany $DIR/$tdir/$tfile-%d 0 100
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tdir/$tfile-%d 100 100
+ local no_ctxt=$(dmesg | grep "No ctxt")
+ [ -z "$no_ctxt" ] || error "ctxt is not initialized in recovery"
}
run_test 60 "test llog post recovery init vs llog unlink"
#test race llog recovery thread vs llog cleanup
test_61a() { # was test_61
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
-
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile-%d 800
- replay_barrier ost1
-# OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
- unlinkmany $DIR/$tdir/$tfile-%d 800
- set_nodes_failloc "$(osts_nodes)" 0x80000221
- facet_failover ost1
- sleep 10
- fail ost1
- sleep 30
- set_nodes_failloc "$(osts_nodes)" 0x0
-
- $CHECKSTAT -t file $DIR/$tdir/$tfile-* && return 1
- rmdir $DIR/$tdir
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ createmany -o $DIR/$tdir/$tfile-%d 800 ||
+ error "createmany create files failed"
+ replay_barrier ost1
+ unlinkmany $DIR/$tdir/$tfile-%d 800
+ # OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+ set_nodes_failloc "$(osts_nodes)" 0x80000221
+ facet_failover ost1
+ sleep 10
+ fail ost1
+ sleep 30
+ set_nodes_failloc "$(osts_nodes)" 0x0
+
+ $CHECKSTAT -t file $DIR/$tdir/$tfile-* &&
+ error "$CHECKSTAT $DIR/$tdir/$tfile attribute check should fail"
+ rmdir $DIR/$tdir
}
run_test 61a "test race llog recovery vs llog cleanup"
#test race mds llog sync vs llog cleanup
test_61b() {
-# OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013a"
- facet_failover $SINGLEMDS
- sleep 10
- fail $SINGLEMDS
- do_facet client dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 || return 1
+ # OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013a"
+ facet_failover $SINGLEMDS
+ sleep 10
+ fail $SINGLEMDS
+ do_facet client dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 ||
+ error "dd failed"
}
run_test 61b "test race mds llog sync vs llog cleanup"
#test race cancel cookie cb vs llog cleanup
test_61c() {
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
-
-# OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
- touch $DIR/$tfile
- set_nodes_failloc "$(osts_nodes)" 0x80000222
- rm $DIR/$tfile
- sleep 10
- fail ost1
- set_nodes_failloc "$(osts_nodes)" 0x0
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+ # OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ set_nodes_failloc "$(osts_nodes)" 0x80000222
+ rm $DIR/$tfile
+ sleep 10
+ fail ost1
+ set_nodes_failloc "$(osts_nodes)" 0x0
}
run_test 61c "test race mds llog sync vs llog cleanup"
# OBD_FAIL_OBD_LLOG_SETUP 0x605
stop mgs
do_facet mgs "lctl set_param fail_loc=0x80000605"
- start mgs $MGSDEV $MGS_MOUNT_OPTS && error "mgs start should have failed"
+ start mgs $(mgsdevname) $MGS_MOUNT_OPTS &&
+ error "mgs start should have failed"
do_facet mgs "lctl set_param fail_loc=0"
- start mgs $MGSDEV $MGS_MOUNT_OPTS || error "cannot restart mgs"
+ start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "cannot restart mgs"
}
run_test 61d "error in llog_setup should cleanup the llog context correctly"
test_62() { # Bug 15756 - don't mis-drop resent replay
- mkdir -p $DIR/$tdir
- replay_barrier $SINGLEMDS
- createmany -o $DIR/$tdir/$tfile- 25
-#define OBD_FAIL_TGT_REPLAY_DROP 0x707
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000707"
- fail $SINGLEMDS
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
- unlinkmany $DIR/$tdir/$tfile- 25 || return 2
- return 0
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ replay_barrier $SINGLEMDS
+ createmany -o $DIR/$tdir/$tfile- 25 ||
+ error "createmany create files failed"
+ #define OBD_FAIL_TGT_REPLAY_DROP 0x707
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000707"
+ fail $SINGLEMDS
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ unlinkmany $DIR/$tdir/$tfile- 25 ||
+ error "unlinkmany $DIR/$tdir/$tfile failed"
+ return 0
}
run_test 62 "don't mis-drop resent replay"
do_facet ost1 "lctl set_param at_history=$at_history" || true
fi
- if [ $AT_MAX_SET -ne 0 ]; then
- for facet in mds client ost; do
- var=AT_MAX_SAVE_${facet}
- echo restore AT on $facet to saved value ${!var}
- at_max_set ${!var} $facet
- at_new=$(at_max_get $facet)
- echo Restored AT value on $facet $at_new
- [ $at_new -eq ${!var} ] || \
- error "$facet : AT value was not restored SAVED ${!var} NEW $at_new"
- done
- fi
+ if [ $AT_MAX_SET -ne 0 ]; then
+ for facet in mds client ost; do
+ var=AT_MAX_SAVE_${facet}
+ echo restore AT on $facet to saved value ${!var}
+ at_max_set ${!var} $facet
+ at_new=$(at_max_get $facet)
+ echo Restored AT value on $facet $at_new
+ [ $at_new -eq ${!var} ] ||
+ error "AT value not restored SAVED ${!var} NEW $at_new"
+ done
+ fi
}
at_start()
test_66b() #bug 3055
{
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
-
- at_start || return 0
- ORIG=$(lctl get_param -n mdc.${FSNAME}-*.timeouts | awk '/network/ {print $4}')
- $LCTL set_param fail_val=$(($ORIG + 5))
-#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c
- $LCTL set_param fail_loc=0x50c
- ls $DIR/$tfile > /dev/null 2>&1
- $LCTL set_param fail_loc=0
- CUR=$(lctl get_param -n mdc.${FSNAME}-*.timeouts | awk '/network/ {print $4}')
- WORST=$(lctl get_param -n mdc.${FSNAME}-*.timeouts | awk '/network/ {print $6}')
- echo "network timeout orig $ORIG, cur $CUR, worst $WORST"
- [ $WORST -gt $ORIG ] || error "Worst $WORST should be worse than orig $ORIG"
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+ at_start || return 0
+ ORIG=$(lctl get_param -n mdc.${FSNAME}-MDT0000*.timeouts |
+ awk '/network/ {print $4}')
+ $LCTL set_param fail_val=$(($ORIG + 5))
+ #define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c
+ $LCTL set_param fail_loc=0x50c
+ ls $DIR/$tfile > /dev/null 2>&1
+ $LCTL set_param fail_loc=0
+ CUR=$(lctl get_param -n mdc.${FSNAME}-MDT0000*.timeouts |
+ awk '/network/ {print $4}')
+ WORST=$(lctl get_param -n mdc.${FSNAME}-MDT0000*.timeouts |
+ awk '/network/ {print $6}')
+ echo "network timeout orig $ORIG, cur $CUR, worst $WORST"
+ [ $WORST -gt $ORIG ] ||
+ error "Worst $WORST should be worse than orig $ORIG"
}
run_test 66b "AT: verify net latency adjusts"
CONN2=$(lctl get_param -n osc.*.stats | awk '/_connect/ {total+=$2} END {print total}')
ATTEMPTS=$(($CONN2 - $CONN1))
echo "$ATTEMPTS osc reconnect attempts on gradual slow"
- [ $ATTEMPTS -gt 0 ] && error_ignore 13721 "AT should have prevented reconnect"
- return 0
+ [ $ATTEMPTS -gt 0 ] &&
+ error_ignore bz13721 "AT should have prevented reconnect"
+ return 0
}
run_test 67a "AT: verify slow request processing doesn't induce reconnects"
local next_id=$(do_facet $SINGLEMDS lctl get_param -n \
osc.$mdtosc.prealloc_next_id)
- mkdir -p $DIR/$tdir/${OST}
- $SETSTRIPE -i 0 -c 1 $DIR/$tdir/${OST} || error "$SETSTRIPE"
- echo "Creating to objid $last_id on ost $OST..."
+ mkdir -p $DIR/$tdir/${OST} || error "mkdir $DIR/$tdir/${OST} failed"
+ $SETSTRIPE -i 0 -c 1 $DIR/$tdir/${OST} || error "$SETSTRIPE failed"
+ echo "Creating to objid $last_id on ost $OST..."
#define OBD_FAIL_OST_PAUSE_CREATE 0x223
do_facet ost1 "$LCTL set_param fail_val=20000"
do_facet ost1 "$LCTL set_param fail_loc=0x80000223"
[ -z "$ldlm_enqueue_min_r" ] && skip "missing /sys/.../ldlm_enqueue_min in the ost1" && return 0
local ENQ_MIN=$(cat $ldlm_enqueue_min)
local ENQ_MIN_R=$(do_facet ost1 "cat $ldlm_enqueue_min_r")
- echo $TIMEOUT >> $ldlm_enqueue_min
- do_facet ost1 "echo $TIMEOUT >> $ldlm_enqueue_min_r"
+ echo $TIMEOUT >> $ldlm_enqueue_min
+ do_facet ost1 "echo $TIMEOUT >> $ldlm_enqueue_min_r"
- rm -rf $DIR/$tdir
- mkdir -p $DIR/$tdir
- $SETSTRIPE --stripe-index=0 --count=1 $DIR/$tdir
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $SETSTRIPE --stripe-index=0 --count=1 $DIR/$tdir
#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
$LCTL set_param fail_val=$(($TIMEOUT - 1))
$LCTL set_param fail_loc=0x80000312
at_cleanup
# end of AT tests includes above lines
-
# start multi-client tests
test_70a () {
- [ -z "$CLIENTS" ] && \
+ [ -z "$CLIENTS" ] &&
{ skip "Need two or more clients." && return; }
- [ $CLIENTCOUNT -lt 2 ] && \
+ [ $CLIENTCOUNT -lt 2 ] &&
{ skip "Need two or more clients, have $CLIENTCOUNT" && return; }
echo "mount clients $CLIENTS ..."
echo "Write/read files on $DIR ; clients $CLIENTS ... "
for CLIENT in $clients; do
do_node $CLIENT dd bs=1M count=10 if=/dev/zero \
- of=$DIR/${tfile}_${CLIENT} 2>/dev/null || \
+ of=$DIR/${tfile}_${CLIENT} 2>/dev/null ||
error "dd failed on $CLIENT"
done
local prev_client=$(echo $clients | sed 's/^.* \(.\+\)$/\1/')
for C in ${CLIENTS//,/ }; do
- do_node $prev_client dd if=$DIR/${tfile}_${C} of=/dev/null 2>/dev/null || \
+ do_node $prev_client dd if=$DIR/${tfile}_${C} \
+ of=/dev/null 2>/dev/null ||
error "dd if=$DIR/${tfile}_${C} failed on $prev_client"
prev_client=$C
done
-
+
ls $DIR
}
run_test 70a "check multi client t-f"
killall_process $clients "$prog" -0
}
-killall_process () {
- local clients=${1:-$(hostname)}
- local name=$2
- local signal=$3
- local rc=0
-
- do_nodes $clients "killall $signal $name"
-}
-
test_70b () {
local clients=${CLIENTS:-$HOSTNAME}
+ local mdscount=$MDSCOUNT
+
+ # until LU-6844 is fixed, run on one MDT instead of disabling test
+ mdscount=1
zconf_mount_clients $clients $MOUNT
local duration=300
- [ "$SLOW" = "no" ] && duration=60
+ [ "$SLOW" = "no" ] && duration=120
# set duration to 900 because it takes some time to boot node
[ "$FAILURE_MODE" = HARD ] && duration=900
+ local elapsed
+ local start_ts=$(date +%s)
local cmd="rundbench 1 -t $duration"
local pid=""
+ if [ $mdscount -ge 2 ]; then
+ test_mkdir -p -c$mdscount $DIR/$tdir
+ $LFS setdirstripe -D -c$mdscount $DIR/$tdir
+ fi
do_nodesv $clients "set -x; MISSING_DBENCH_OK=$MISSING_DBENCH_OK \
PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests/:$DBENCH_LIB \
DBENCH_LIB=$DBENCH_LIB TESTSUITE=$TESTSUITE TESTNAME=$TESTNAME \
MOUNT=$MOUNT DIR=$DIR/$tdir/\\\$(hostname) LCTL=$LCTL $cmd" &
pid=$!
+
+ #LU-1897 wait for all dbench copies to start
+ while ! check_for_process $clients dbench; do
+ elapsed=$(($(date +%s) - start_ts))
+ if [ $elapsed -gt $duration ]; then
+ killall_process $clients dbench
+ error "dbench failed to start on $clients!"
+ fi
+ sleep 1
+ done
+
log "Started rundbench load pid=$pid ..."
- # give rundbench a chance to start, bug 24118
- sleep 12
- local elapsed=0
+ elapsed=$(($(date +%s) - start_ts))
local num_failovers=0
- local start_ts=$(date +%s)
+ local fail_index=1
while [ $elapsed -lt $duration ]; do
if ! check_for_process $clients dbench; then
- error_noexit "dbench not found on some of $clients!"
+ error_noexit "dbench stopped on some of $clients!"
killall_process $clients dbench
break
fi
sleep 1
- replay_barrier $SINGLEMDS
+ replay_barrier mds$fail_index
sleep 1 # give clients a time to do operations
# Increment the number of failovers
num_failovers=$((num_failovers+1))
- log "$TESTNAME fail $SINGLEMDS $num_failovers times"
- fail $SINGLEMDS
+ log "$TESTNAME fail mds$fail_index $num_failovers times"
+ fail mds$fail_index
elapsed=$(($(date +%s) - start_ts))
+ if [ $fail_index -ge $mdscount ]; then
+ fail_index=1
+ else
+ fail_index=$((fail_index+1))
+ fi
done
wait $pid || error "rundbench load on $clients failed!"
}
-run_test 70b "mds recovery; $CLIENTCOUNT clients"
+run_test 70b "dbench ${MDSCOUNT}mdts recovery; $CLIENTCOUNT clients"
# end multi-client tests
-test_73a() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
+random_fail_mdt() {
+ local max_index=$1
+ local duration=$2
+ local monitor_pid=$3
+ local elapsed
+ local start_ts=$(date +%s)
+ local num_failovers=0
+ local fail_index
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_LDLM_ENQUEUE 0x302
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000302"
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
+ elapsed=$(($(date +%s) - start_ts))
+ while [ $elapsed -lt $duration ]; do
+ fail_index=$((RANDOM%max_index+1))
+ kill -0 $monitor_pid ||
+ error "$monitor_pid stopped"
+ sleep 120
+ replay_barrier mds$fail_index
+ sleep 10
+ # Increment the number of failovers
+ num_failovers=$((num_failovers+1))
+ log "$TESTNAME fail mds$fail_index $num_failovers times"
+ fail mds$fail_index
+ elapsed=$(($(date +%s) - start_ts))
+ done
}
-run_test 73a "open(O_CREAT), unlink, replay, reconnect before open replay , close"
-
-test_73b() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_LDLM_REPLY 0x30c
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
+cleanup_70c() {
+ trap 0
+ rm -f $DIR/replay-single.70c.lck
+ rm -rf /$DIR/$tdir
}
-run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close"
-test_73c() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
+test_70c () {
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_TGT_LAST_REPLAY 0x710
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000710"
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
-}
-run_test 73c "open(O_CREAT), unlink, replay, reconnect at last_replay, close"
+ zconf_mount_clients $clients $MOUNT
-# bug 18554
-test_74() {
- local clients=${CLIENTS:-$HOSTNAME}
-
- stop ost1
- zconf_umount_clients $clients $MOUNT
- facet_failover $SINGLEMDS
- zconf_mount_clients $clients $MOUNT
- mount_facet ost1
- touch $DIR/$tfile || return 1
- rm $DIR/$tfile || return 2
- clients_up || error "client evicted: $?"
- return 0
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=600
+
+ local elapsed
+ local start_ts=$(date +%s)
+
+ trap cleanup_70c EXIT
+ (
+ while [ ! -e $DIR/replay-single.70c.lck ]; do
+ test_mkdir -p -c$MDSCOUNT $DIR/$tdir || break
+ if [ $MDSCOUNT -ge 2 ]; then
+ $LFS setdirstripe -D -c$MDSCOUNT $DIR/$tdir ||
+ error "set default dirstripe failed"
+ fi
+ cd $DIR/$tdir || break
+ tar cf - /etc | tar xf - || error "tar failed in loop"
+ done
+ )&
+ tar_70c_pid=$!
+ echo "Started tar $tar_70c_pid"
+
+ random_fail_mdt $MDSCOUNT $duration $tar_70c_pid
+ kill -0 $tar_70c_pid || error "tar $tar_70c_pid stopped"
+
+ touch $DIR/replay-single.70c.lck
+ wait $tar_70c_pid || error "$?: tar failed"
+
+ cleanup_70c
+ true
}
-run_test 74 "Ensure applications don't fail waiting for OST recovery"
+run_test 70c "tar ${MDSCOUNT}mdts recovery"
-test_80a() {
- [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
-
- mkdir -p $DIR/$tdir
- replay_barrier mds2
- $CHECKSTAT -t dir $DIR/$tdir || error "$CHECKSTAT -t dir $DIR/$tdir failed"
- rmdir $DIR/$tdir || error "rmdir $DIR/$tdir failed"
- fail mds2
- stat $DIR/$tdir 2&>/dev/null && error "$DIR/$tdir still exist after recovery!"
- return 0
+cleanup_70d() {
+ trap 0
+ kill -9 $mkdir_70d_pid
}
-run_test 80a "CMD: unlink cross-node dir (fail mds with inode)"
-test_80b() {
- [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+test_70d () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
- mkdir -p $DIR/$tdir
- replay_barrier $SINGLEMDS
- $CHECKSTAT -t dir $DIR/$tdir || error "$CHECKSTAT -t dir $DIR/$tdir failed"
- rmdir $DIR/$tdir || error "rmdir $DIR/$tdir failed"
- fail $SINGLEMDS
- stat $DIR/$tdir 2&>/dev/null && error "$DIR/$tdir still exist after recovery!"
- return 0
-}
-run_test 80b "CMD: unlink cross-node dir (fail mds with name)"
+ zconf_mount_clients $clients $MOUNT
-test_81a() {
- [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/f 3000 || error "createmany failed"
- sleep 10
- $CHECKSTAT -t dir $DIR/$tdir || error "$CHECKSTAT -t dir failed"
- $CHECKSTAT -t file $DIR/$tdir/f1002 || error "$CHECKSTAT -t file failed"
- replay_barrier $SINGLEMDS
- rm $DIR/$tdir/f1002 || error "rm $DIR/$tdir/f1002 failed"
- fail $SINGLEMDS
- stat $DIR/$tdir/f1002
-}
-run_test 81a "CMD: unlink cross-node file (fail mds with name)"
+ mkdir -p $DIR/$tdir
-test_82a() {
- [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local elapsed
+ local start_ts=$(date +%s)
- local dir=$DIR/d82a
- replay_barrier mds2
- mkdir $dir || error "mkdir $dir failed"
- log "FAILOVER mds2"
- fail mds2
- stat $DIR
- $CHECKSTAT -t dir $dir || error "$CHECKSTAT -t dir $dir failed"
+ trap cleanup_70d EXIT
+ (
+ while true; do
+ $LFS mkdir -i0 -c2 $DIR/$tdir/test || {
+ echo "mkdir fails"
+ break
+ }
+ $LFS mkdir -i1 -c2 $DIR/$tdir/test1 || {
+ echo "mkdir fails"
+ break
+ }
+
+ touch $DIR/$tdir/test/a || {
+ echo "touch fails"
+ break;
+ }
+ mkdir $DIR/$tdir/test/b || {
+ echo "mkdir fails"
+ break;
+ }
+ rm -rf $DIR/$tdir/test || {
+ echo "rmdir fails"
+ break
+ }
+
+ touch $DIR/$tdir/test1/a || {
+ echo "touch fails"
+ break;
+ }
+ mkdir $DIR/$tdir/test1/b || {
+ echo "mkdir fails"
+ break;
+ }
+
+ rm -rf $DIR/$tdir/test1 || {
+ echo "rmdir fails"
+ break
+ }
+ done
+ )&
+ mkdir_70d_pid=$!
+ echo "Started $mkdir_70d_pid"
+
+ random_fail_mdt $MDSCOUNT $duration $mkdir_70d_pid
+ kill -0 $mkdir_70d_pid || error "mkdir/rmdir $mkdir_70d_pid stopped"
+
+ cleanup_70d
+ true
}
-run_test 82a "CMD: mkdir cross-node dir (fail mds with inode)"
+run_test 70d "mkdir/rmdir striped dir ${MDSCOUNT}mdts recovery"
-test_82b() {
- [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
-
- local dir=$DIR/d82b
- replay_barrier $SINGLEMDS
- mkdir $dir || error "mkdir $dir failed"
- log "FAILOVER mds1"
- fail $SINGLEMDS
- stat $DIR
- $CHECKSTAT -t dir $dir || error "$CHECKSTAT -t dir $dir failed"
+cleanup_70e() {
+ trap 0
+ kill -9 $rename_70e_pid
}
-run_test 82b "CMD: mkdir cross-node dir (fail mds with name)"
-test_83a() {
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile- 10 || return 1
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140"
- unlinkmany $DIR/$tdir/$tfile- 10 || return 2
+test_70e () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
+
+ echo ha > /proc/sys/lnet/debug
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i0 $DIR/$tdir/test_0
+ $LFS mkdir -i0 $DIR/$tdir/test_1
+ touch $DIR/$tdir/test_0/a
+ touch $DIR/$tdir/test_1/b
+ trap cleanup_70e EXIT
+ (
+ while true; do
+ mrename $DIR/$tdir/test_0/a $DIR/$tdir/test_1/b > \
+ /dev/null || {
+ echo "a->b fails"
+ break;
+ }
+
+ checkstat $DIR/$tdir/test_0/a && {
+ echo "a still exists"
+ break
+ }
+
+ checkstat $DIR/$tdir/test_1/b || {
+ echo "b still exists"
+ break
+ }
+
+ touch $DIR/$tdir/test_0/a || {
+ echo "touch a fails"
+ break
+ }
+
+ mrename $DIR/$tdir/test_1/b $DIR/$tdir/test_0/a > \
+ /dev/null || {
+ echo "a->a fails"
+ break;
+ }
+ done
+ )&
+ rename_70e_pid=$!
+ echo "Started $rename_70e_pid"
+
+ random_fail_mdt 2 $duration $rename_70e_pid
+ kill -0 $rename_70e_pid || error "rename $rename_70e_pid stopped"
+
+ cleanup_70e
+ true
}
-run_test 83a "fail log_add during unlink recovery"
+run_test 70e "rename cross-MDT with random fails"
-test_83b() {
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile- 10 || return 1
- replay_barrier $SINGLEMDS
- unlinkmany $DIR/$tdir/$tfile- 10 || return 2
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140"
- fail $SINGLEMDS
+cleanup_71a() {
+ trap 0
+ kill -9 $mkdir_71a_pid
}
-run_test 83b "fail log_add during unlink recovery"
-test_84a() {
-#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000144"
- createmany -o $DIR/$tfile- 1 &
- PID=$!
- mds_evict_client
- wait $PID
- client_up || client_up || true # reconnect
+random_double_fail_mdt() {
+ local max_index=$1
+ local duration=$2
+ local monitor_pid=$3
+ local elapsed
+ local start_ts=$(date +%s)
+ local num_failovers=0
+ local fail_index
+ local second_index
+
+ elapsed=$(($(date +%s) - start_ts))
+ while [ $elapsed -lt $duration ]; do
+ fail_index=$((RANDOM%max_index + 1))
+ if [ $fail_index -eq $max_index ]; then
+ second_index=1
+ else
+ second_index=$((fail_index + 1))
+ fi
+ kill -0 $monitor_pid ||
+ error "$monitor_pid stopped"
+ sleep 120
+ replay_barrier mds$fail_index
+ replay_barrier mds$second_index
+ sleep 10
+ # Increment the number of failovers
+ num_failovers=$((num_failovers+1))
+ log "fail mds$fail_index mds$second_index $num_failovers times"
+ fail mds${fail_index},mds${second_index}
+ elapsed=$(($(date +%s) - start_ts))
+ done
}
-run_test 84a "stale open during export disconnect"
-test_85a() { #bug 16774
- lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+test_71a () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
- for i in `seq 100`; do
- echo "tag-$i" > $DIR/$tfile-$i
- grep -q "tag-$i" $DIR/$tfile-$i || error "f2-$i"
- done
+ zconf_mount_clients $clients $MOUNT
- lov_id=`lctl dl | grep "clilov"`
- addr=`echo $lov_id | awk '{print $4}' | awk -F '-' '{print $3}'`
- count=`lctl get_param -n ldlm.namespaces.*MDT0000*$addr.lock_unused_count`
- echo "before recovery: unused locks count = $count"
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
- fail $SINGLEMDS
+ mkdir -p $DIR/$tdir
- count2=`lctl get_param -n ldlm.namespaces.*MDT0000*$addr.lock_unused_count`
- echo "after recovery: unused locks count = $count2"
+ local elapsed
+ local start_ts=$(date +%s)
- if [ $count2 -ge $count ]; then
- error "unused locks are not canceled"
- fi
+ trap cleanup_71a EXIT
+ (
+ while true; do
+ $LFS mkdir -i0 -c2 $DIR/$tdir/test
+ rmdir $DIR/$tdir/test
+ done
+ )&
+ mkdir_71a_pid=$!
+ echo "Started $mkdir_71a_pid"
+
+ random_double_fail_mdt 2 $duration $mkdir_71a_pid
+ kill -0 $mkdir_71a_pid || error "mkdir/rmdir $mkdir_71a_pid stopped"
+
+ cleanup_71a
+ true
}
-run_test 85a "check the cancellation of unused locks during recovery(IBITS)"
+run_test 71a "mkdir/rmdir striped dir with 2 mdts recovery"
-test_85b() { #bug 16774
- lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+test_73a() {
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
- do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME || return 1
- do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $FSNAME-OST0000 || return 2
+ replay_barrier $SINGLEMDS
+ #define OBD_FAIL_LDLM_ENQUEUE_NET 0x302
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000302"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || error "multiop pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
+}
+run_test 73a "open(O_CREAT), unlink, replay, reconnect before open replay, close"
- $SETSTRIPE -c 1 -p $FSNAME.$TESTNAME $DIR
+test_73b() {
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
- for i in `seq 100`; do
- dd if=/dev/urandom of=$DIR/$tfile-$i bs=4096 count=32 >/dev/null 2>&1
- done
+ replay_barrier $SINGLEMDS
+ #define OBD_FAIL_LDLM_REPLY 0x30c
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || error "multiop pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
+}
+run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close"
- cancel_lru_locks osc
+# bug 18554
+test_74() {
+ local clients=${CLIENTS:-$HOSTNAME}
- for i in `seq 100`; do
- dd if=$DIR/$tfile-$i of=/dev/null bs=4096 count=32 >/dev/null 2>&1
- done
+ zconf_umount_clients $clients $MOUNT
+ stop ost1
+ facet_failover $SINGLEMDS
+ zconf_mount_clients $clients $MOUNT
+ mount_facet ost1
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ rm $DIR/$tfile || error "rm $DIR/$tfile failed"
+ clients_up || error "client evicted: $?"
+ return 0
+}
+run_test 74 "Ensure applications don't fail waiting for OST recovery"
- lov_id=`lctl dl | grep "clilov"`
- addr=`echo $lov_id | awk '{print $4}' | awk -F '-' '{print $3}'`
- count=`lctl get_param -n ldlm.namespaces.*OST0000*$addr.lock_unused_count`
- echo "before recovery: unused locks count = $count"
- [ $count != 0 ] || return 3
+remote_dir_check_80() {
+ local MDTIDX=1
+ local diridx
+ local fileidx
- fail ost1
+ diridx=$($GETSTRIPE -M $remote_dir) ||
+ error "$GETSTRIPE -M $remote_dir failed"
+ [ $diridx -eq $MDTIDX ] || error "$diridx != $MDTIDX"
- count2=`lctl get_param -n ldlm.namespaces.*OST0000*$addr.lock_unused_count`
- echo "after recovery: unused locks count = $count2"
+ createmany -o $remote_dir/f-%d 20 || error "creation failed"
+ fileidx=$($GETSTRIPE -M $remote_dir/f-1) ||
+ error "$GETSTRIPE -M $remote_dir/f-1 failed"
+ [ $fileidx -eq $MDTIDX ] || error "$fileidx != $MDTIDX"
- do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $FSNAME-OST0000 || return 4
- do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || return 5
+ return 0
+}
- if [ $count2 -ge $count ]; then
- error "unused locks are not canceled"
- fi
+test_80a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80a "DNE: create remote dir, drop update rep from MDT0, fail MDT0"
+
+test_80b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80b "DNE: create remote dir, drop update rep from MDT0, fail MDT1"
+
+test_80c() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80c "DNE: create remote dir, drop update rep from MDT1, fail MDT[0,1]"
+
+test_80d() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80d "DNE: create remote dir, drop update rep from MDT1, fail 2 MDTs"
+
+test_80e() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80e "DNE: create remote dir, drop MDT1 rep, fail MDT0"
+
+test_80f() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80f "DNE: create remote dir, drop MDT1 rep, fail MDT1"
+
+test_80g() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80g "DNE: create remote dir, drop MDT1 rep, fail MDT0, then MDT1"
+
+test_80h() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote dir creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80h "DNE: create remote dir, drop MDT1 rep, fail 2 MDTs"
+
+test_81a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ touch $remote_dir || error "touch $remote_dir failed"
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81a "DNE: unlink remote dir, drop MDT0 update rep, fail MDT1"
+
+test_81b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81b "DNE: unlink remote dir, drop MDT0 update reply, fail MDT0"
+
+test_81c() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81c "DNE: unlink remote dir, drop MDT0 update reply, fail MDT0,MDT1"
+
+test_81d() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81d "DNE: unlink remote dir, drop MDT0 update reply, fail 2 MDTs"
+
+test_81e() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81e "DNE: unlink remote dir, drop MDT1 req reply, fail MDT0"
+
+test_81f() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81f "DNE: unlink remote dir, drop MDT1 req reply, fail MDT1"
+
+test_81g() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81g "DNE: unlink remote dir, drop req reply, fail M0, then M1"
+
+test_81h() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81h "DNE: unlink remote dir, drop request reply, fail 2 MDTs"
+
+test_84a() {
+#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000144"
+ createmany -o $DIR/$tfile- 1 &
+ PID=$!
+ mds_evict_client
+ wait $PID
+ client_up || client_up || true # reconnect
+}
+run_test 84a "stale open during export disconnect"
+
+test_85a() { #bug 16774
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+
+ for i in $(seq 100); do
+ echo "tag-$i" > $DIR/$tfile-$i
+ grep -q "tag-$i" $DIR/$tfile-$i || error "f2-$i"
+ done
+
+ lov_id=$(lctl dl | grep "clilov")
+ addr=$(echo $lov_id | awk '{print $4}' | awk -F '-' '{print $3}')
+ count=$(lctl get_param -n \
+ ldlm.namespaces.*MDT0000*$addr.lock_unused_count)
+ echo "before recovery: unused locks count = $count"
+
+ fail $SINGLEMDS
+
+ count2=$(lctl get_param -n \
+ ldlm.namespaces.*MDT0000*$addr.lock_unused_count)
+ echo "after recovery: unused locks count = $count2"
+
+ if [ $count2 -ge $count ]; then
+ error "unused locks are not canceled"
+ fi
+}
+run_test 85a "check the cancellation of unused locks during recovery(IBITS)"
+
+test_85b() { #bug 16774
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+
+ do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME ||
+ error "unable to create pool $TESTNAME"
+ do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $FSNAME-OST0000 ||
+ error "unable to add pool $TESTNAME"
+
+ $SETSTRIPE -c 1 -p $FSNAME.$TESTNAME $DIR
+
+ for i in $(seq 100); do
+ dd if=/dev/urandom of=$DIR/$tfile-$i bs=4096 \
+ count=32 >/dev/null 2>&1
+ done
+
+ cancel_lru_locks osc
+
+ for i in $(seq 100); do
+ dd if=$DIR/$tfile-$i of=/dev/null bs=4096 \
+ count=32 >/dev/null 2>&1
+ done
+
+ lov_id=$(lctl dl | grep "clilov")
+ addr=$(echo $lov_id | awk '{print $4}' | awk -F '-' '{print $3}')
+ count=$(lctl get_param \
+ -n ldlm.namespaces.*OST0000*$addr.lock_unused_count)
+ echo "before recovery: unused locks count = $count"
+ [ $count != 0 ] || error "unused locks ($count) should be zero"
+
+ fail ost1
+
+ count2=$(lctl get_param \
+ -n ldlm.namespaces.*OST0000*$addr.lock_unused_count)
+ echo "after recovery: unused locks count = $count2"
+
+ do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $FSNAME-OST0000 ||
+ error "unable to remove pool $TESTNAME"
+ do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
+ error "unable to destroy the pool $TESTNAME"
+
+ if [ $count2 -ge $count ]; then
+ error "unused locks are not canceled"
+ fi
}
run_test 85b "check the cancellation of unused locks during recovery(EXTENT)"
run_test 86 "umount server after clear nid_stats should not hit LBUG"
test_87() {
- do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
-
- replay_barrier ost1
- $SETSTRIPE -i 0 -c 1 $DIR/$tfile
- dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 || error "Cannot write"
- cksum=`md5sum $DIR/$tfile | awk '{print $1}'`
- cancel_lru_locks osc
- fail ost1
- dd if=$DIR/$tfile of=/dev/null bs=1024k count=8 || error "Cannot read"
- cksum2=`md5sum $DIR/$tfile | awk '{print $1}'`
- if [ $cksum != $cksum2 ] ; then
- error "New checksum $cksum2 does not match original $cksum"
- fi
+ do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
+
+ replay_barrier ost1
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 ||
+ error "dd to $DIR/$tfile failed"
+ cksum=$(md5sum $DIR/$tfile | awk '{print $1}')
+ cancel_lru_locks osc
+ fail ost1
+ dd if=$DIR/$tfile of=/dev/null bs=1024k count=8 || error "Cannot read"
+ cksum2=$(md5sum $DIR/$tfile | awk '{print $1}')
+ if [ $cksum != $cksum2 ] ; then
+ error "New checksum $cksum2 does not match original $cksum"
+ fi
}
run_test 87 "write replay"
test_87b() {
- do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
-
- replay_barrier ost1
- $SETSTRIPE -i 0 -c 1 $DIR/$tfile
- dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 || error "Cannot write"
- sleep 1 # Give it a chance to flush dirty data
- echo TESTTEST | dd of=$DIR/$tfile bs=1 count=8 seek=64
- cksum=`md5sum $DIR/$tfile | awk '{print $1}'`
- cancel_lru_locks osc
- fail ost1
- dd if=$DIR/$tfile of=/dev/null bs=1024k count=8 || error "Cannot read"
- cksum2=`md5sum $DIR/$tfile | awk '{print $1}'`
- if [ $cksum != $cksum2 ] ; then
- error "New checksum $cksum2 does not match original $cksum"
- fi
+ do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
+
+ replay_barrier ost1
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 ||
+ error "dd to $DIR/$tfile failed"
+ sleep 1 # Give it a chance to flush dirty data
+ echo TESTTEST | dd of=$DIR/$tfile bs=1 count=8 seek=64
+ cksum=$(md5sum $DIR/$tfile | awk '{print $1}')
+ cancel_lru_locks osc
+ fail ost1
+ dd if=$DIR/$tfile of=/dev/null bs=1024k count=8 || error "Cannot read"
+ cksum2=$(md5sum $DIR/$tfile | awk '{print $1}')
+ if [ $cksum != $cksum2 ] ; then
+ error "New checksum $cksum2 does not match original $cksum"
+ fi
}
run_test 87b "write replay with changed data (checksum resend)"
test_88() { #bug 17485
- mkdir -p $DIR/$tdir
- mkdir -p $TMP/$tdir
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ mkdir -p $TMP/$tdir || error "mkdir $TMP/$tdir failed"
- $SETSTRIPE -i 0 -c 1 $DIR/$tdir || error "$SETSTRIPE"
+ $SETSTRIPE -i 0 -c 1 $DIR/$tdir || error "$SETSTRIPE"
- replay_barrier ost1
- replay_barrier $SINGLEMDS
+ replay_barrier ost1
+ replay_barrier $SINGLEMDS
# exhaust precreations on ost1
local OST=$(ostname_from_index 0)
local mdtosc=$(get_mdtosc_proc_path $SINGLEMDS $OST)
local last_id=$(do_facet $SINGLEMDS lctl get_param -n osc.$mdtosc.prealloc_last_id)
local next_id=$(do_facet $SINGLEMDS lctl get_param -n osc.$mdtosc.prealloc_next_id)
- echo "before test: last_id = $last_id, next_id = $next_id"
+ echo "before test: last_id = $last_id, next_id = $next_id"
- echo "Creating to objid $last_id on ost $OST..."
- createmany -o $DIR/$tdir/f-%d $next_id $((last_id - next_id + 2))
+ echo "Creating to objid $last_id on ost $OST..."
+ createmany -o $DIR/$tdir/f-%d $next_id $((last_id - next_id + 2)) ||
+ error "createmany create files to last_id failed"
- #create some files to use some uncommitted objids
- last_id=$(($last_id + 1))
- createmany -o $DIR/$tdir/f-%d $last_id 8
+ #create some files to use some uncommitted objids
+ last_id=$(($last_id + 1))
+ createmany -o $DIR/$tdir/f-%d $last_id 8 ||
+ error "createmany create files with uncommitted objids failed"
last_id2=$(do_facet $SINGLEMDS lctl get_param -n osc.$mdtosc.prealloc_last_id)
next_id2=$(do_facet $SINGLEMDS lctl get_param -n osc.$mdtosc.prealloc_next_id)
last_id2=$(do_facet $SINGLEMDS lctl get_param -n osc.$mdtosc.prealloc_last_id)
next_id2=$(do_facet $SINGLEMDS lctl get_param -n osc.$mdtosc.prealloc_next_id)
- echo "after recovery: last_id = $last_id2, next_id = $next_id2"
+ echo "after recovery: last_id = $last_id2, next_id = $next_id2"
- # create new files, which should use new objids, and ensure the orphan
- # cleanup phase for ost1 is completed at the same time
- for i in `seq 8`; do
- file_id=$(($last_id + 10 + $i))
- dd if=/dev/urandom of=$DIR/$tdir/f-$file_id bs=4096 count=128
- done
+ # create new files, which should use new objids, and ensure the orphan
+ # cleanup phase for ost1 is completed at the same time
+ for i in $(seq 8); do
+ file_id=$(($last_id + 10 + $i))
+ dd if=/dev/urandom of=$DIR/$tdir/f-$file_id bs=4096 count=128
+ done
- # if the objids were not recreated, then "ls" will failed for -ENOENT
- ls -l $DIR/$tdir/* || error "can't get the status of precreated files"
+ # if the objids were not recreated, then "ls" will fail with -ENOENT
+ ls -l $DIR/$tdir/* || error "can't get the status of precreated files"
- local file_id
- # write into previously created files
- for i in `seq 8`; do
- file_id=$(($last_id + $i))
- dd if=/dev/urandom of=$DIR/$tdir/f-$file_id bs=4096 count=128
- cp -f $DIR/$tdir/f-$file_id $TMP/$tdir/
- done
+ local file_id
+ # write into previously created files
+ for i in $(seq 8); do
+ file_id=$(($last_id + $i))
+ dd if=/dev/urandom of=$DIR/$tdir/f-$file_id bs=4096 count=128
+ cp -f $DIR/$tdir/f-$file_id $TMP/$tdir/
+ done
- # compare the content
- for i in `seq 8`; do
- file_id=$(($last_id + $i))
- cmp $TMP/$tdir/f-$file_id $DIR/$tdir/f-$file_id || error "the content" \
- "of file is modified!"
- done
+ # compare the content
+ for i in $(seq 8); do
+ file_id=$(($last_id + $i))
+ cmp $TMP/$tdir/f-$file_id $DIR/$tdir/f-$file_id ||
+ error "the content of file is modified!"
+ done
- rm -fr $TMP/$tdir
+ rm -fr $TMP/$tdir
}
run_test 88 "MDS should not assign same objid to different files "
test_89() {
- cancel_lru_locks osc
- mkdir -p $DIR/$tdir
- rm -f $DIR/$tdir/$tfile
- wait_mds_ost_sync
+ cancel_lru_locks osc
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ rm -f $DIR/$tdir/$tfile
+ wait_mds_ost_sync
wait_delete_completed
- BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
- $SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
- dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
- sync
- stop ost1
- facet_failover $SINGLEMDS
- rm $DIR/$tdir/$tfile
- umount $MOUNT
- mount_facet ost1
- zconf_mount $(hostname) $MOUNT
- client_up || return 1
- wait_mds_ost_sync
+ BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ $SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
+ dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
+ sync
+ stop ost1
+ facet_failover $SINGLEMDS
+ rm $DIR/$tdir/$tfile
+ umount $MOUNT
+ mount_facet ost1
+ zconf_mount $(hostname) $MOUNT || error "mount fails"
+ client_up || error "client_up failed"
+ wait_mds_ost_sync
wait_delete_completed
- BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
- [ $((BLOCKS2 - BLOCKS1)) -le 4 ] || \
+ BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ [ $((BLOCKS2 - BLOCKS1)) -le 4 ] ||
error $((BLOCKS2 - BLOCKS1)) blocks leaked
}
fi
fi
- mkdir -p $dir
+ mkdir $dir || error "mkdir $dir failed"
- echo "Create the files"
+ echo "Create the files"
# file "f${index}" striped over 1 OST
# file "all" striped over all OSTs
local uuid=$(ostuuid_from_index $i)
for file in f$i all; do
if [[ $dir/$file != $($LFS find --obd $uuid --name $file $dir) ]]; then
- $GETSTRIPE $dir/file
- error wrong stripe: $file, uuid: $uuid
+ $GETSTRIPE $dir/$file
+ error wrong stripe: $file, uuid: $uuid
fi
done
done
- # Before failing an OST, get its obd name and index
- local varsvc=${ostfail}_svc
- local obd=$(do_facet $ostfail lctl get_param -n obdfilter.${!varsvc}.uuid)
- local index=${obd:(-6):1}
-
- echo "Fail $ostfail $obd, display the list of affected files"
- shutdown_facet $ostfail || return 2
-
- trap "cleanup_90 $ostfail" EXIT INT
- echo "General Query: lfs find $dir"
- local list=$($LFS find $dir)
- echo "$list"
- for (( i=0; i<$OSTCOUNT; i++ )); do
- list_member "$list" $dir/f$i || error_noexit "lfs find $dir: no file f$i"
- done
- list_member "$list" $dir/all || error_noexit "lfs find $dir: no file all"
+ # Before failing an OST, get its obd name and index
+ local varsvc=${ostfail}_svc
+ local obd=$(do_facet $ostfail lctl get_param \
+ -n obdfilter.${!varsvc}.uuid)
+ local index=$(($(facet_number $ostfail) - 1))
+
+ echo "Fail $ostfail $obd, display the list of affected files"
+ shutdown_facet $ostfail || error "shutdown_facet $ostfail failed"
+
+ trap "cleanup_90 $ostfail" EXIT INT
+ echo "General Query: lfs find $dir"
+ local list=$($LFS find $dir)
+ echo "$list"
+ for (( i=0; i<$OSTCOUNT; i++ )); do
+ list_member "$list" $dir/f$i ||
+ error_noexit "lfs find $dir: no file f$i"
+ done
+ list_member "$list" $dir/all ||
+ error_noexit "lfs find $dir: no file all"
- # focus on the missing OST,
- # we expect to see only two files affected: "f$(index)" and "all"
+ # focus on the missing OST,
+ # we expect to see only two files affected: "f$(index)" and "all"
- echo "Querying files on shutdown $ostfail: lfs find --obd $obd"
+ echo "Querying files on shutdown $ostfail: lfs find --obd $obd"
list=$($LFS find --obd $obd $dir)
echo "$list"
for file in all f$index; do
}
run_test 90 "lfs find identifies the missing striped file segments"
-complete $(basename $0) $SECONDS
+test_93() {
+ local server_version=$(lustre_version_code $SINGLEMDS)
+ [[ $server_version -ge $(version_code 2.6.90) ]] ||
+ [[ $server_version -ge $(version_code 2.5.4) &&
+ $server_version -lt $(version_code 2.5.50) ]] ||
+ { skip "Need MDS version 2.5.4+ or 2.6.90+"; return; }
+
+ cancel_lru_locks osc
+
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile ||
+ error "$SETSTRIPE $DIR/$tfile failed"
+ dd if=/dev/zero of=$DIR/$tfile bs=1024 count=1 ||
+ error "dd to $DIR/$tfile failed"
+ #define OBD_FAIL_TGT_REPLAY_RECONNECT 0x715
+ # We need to emulate a state that OST is waiting for other clients
+ # not completing the recovery. Final ping is queued, but reply will be
+ # sent on the recovery completion. It is done by sleep before
+ # processing final pings
+ do_facet ost1 "$LCTL set_param fail_val=40"
+ do_facet ost1 "$LCTL set_param fail_loc=0x715"
+ fail ost1
+}
+run_test 93 "replay + reconnect"
+
+striped_dir_check_100() {
+ local striped_dir=$DIR/$tdir/striped_dir
+ local stripe_count=$($LFS getdirstripe -c $striped_dir)
+
+ $LFS getdirstripe $striped_dir
+ [ $stripe_count -eq 2 ] || error "$stripe_count != 2"
+
+ createmany -o $striped_dir/f-%d 20 ||
+ error "creation failed under striped dir"
+}
+
+test_100a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local striped_dir=$DIR/$tdir/striped_dir
+ local MDTIDX=1
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+ #To make sure MDT1 and MDT0 are connected
+ #otherwise it may create single stripe dir here
+ $LFS setdirstripe -i1 $DIR/$tdir/remote_dir
+
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
+ do_facet mds$((MDTIDX+1)) lctl set_param fail_loc=0x1701
+ $LFS setdirstripe -i0 -c2 $striped_dir &
+ local CLIENT_PID=$!
+
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "striped dir creation failed"
+
+ striped_dir_check_100 || error "striped dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 100a "DNE: create striped dir, drop update rep from MDT1, fail MDT1"
+
+test_100b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local striped_dir=$DIR/$tdir/striped_dir
+ local MDTIDX=1
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+ #To make sure MDT1 and MDT0 are connected
+ #otherwise it may create single stripe dir here
+ $LFS setdirstripe -i1 $DIR/$tdir/remote_dir
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$MDTIDX lctl set_param fail_loc=0x119
+ $LFS mkdir -i0 -c2 $striped_dir &
+
+ local CLIENT_PID=$!
+ fail mds$MDTIDX
+
+ wait $CLIENT_PID || error "striped dir creation failed"
+
+ striped_dir_check_100 || error "striped dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 100b "DNE: create striped dir, fail MDT0"
+
+test_101() { #LU-5648
+ mkdir -p $DIR/$tdir/d1
+ mkdir -p $DIR/$tdir/d2
+ touch $DIR/$tdir/file0
+ num=1000
+
+ replay_barrier $SINGLEMDS
+ for i in $(seq $num) ; do
+ echo test$i > $DIR/$tdir/d1/file$i
+ done
+
+ fail_abort $SINGLEMDS
+ for i in $(seq $num) ; do
+ touch $DIR/$tdir/d2/file$i
+ test -s $DIR/$tdir/d2/file$i &&
+ ls -al $DIR/$tdir/d2/file$i && error "file$i's size > 0"
+ done
+
+ rm -rf $DIR/$tdir
+}
+run_test 101 "Shouldn't reassign precreated objs to other files after recovery"
+
+test_102a() {
+ local idx
+ local facet
+ local num
+ local i
+ local pids pid
+
+ [[ $(lctl get_param mdc.*.import |
+ grep "connect_flags:.*multi_mod_rpc") ]] ||
+ { skip "Need MDC with 'multi_mod_rpcs' feature"; return 0; }
+
+ $LFS mkdir -c1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ idx=$(printf "%04x" $($LFS getdirstripe -i $DIR/$tdir))
+ facet="mds$((0x$idx + 1))"
+
+ # get current value of max_mod_rcps_in_flight
+ num=$($LCTL get_param -n \
+ mdc.$FSNAME-MDT$idx-mdc-*.max_mod_rpcs_in_flight)
+ # set default value if client does not support multi mod RPCs
+ [ -z "$num" ] && num=1
+
+ echo "creating $num files ..."
+ umask 0022
+ for i in $(seq $num); do
+ touch $DIR/$tdir/file-$i
+ done
+
+ # drop request on MDT to force resend
+ #define OBD_FAIL_MDS_REINT_MULTI_NET 0x159
+ do_facet $facet "$LCTL set_param fail_loc=0x159"
+ echo "launch $num chmod in parallel ($(date +%H:%M:%S)) ..."
+ for i in $(seq $num); do
+ chmod 0600 $DIR/$tdir/file-$i &
+ pids="$pids $!"
+ done
+ sleep 1
+ do_facet $facet "$LCTL set_param fail_loc=0"
+ for pid in $pids; do
+ wait $pid || error "chmod failed"
+ done
+ echo "done ($(date +%H:%M:%S))"
+
+ # check chmod succeed
+ for i in $(seq $num); do
+ checkstat -vp 0600 $DIR/$tdir/file-$i
+ done
+
+ rm -rf $DIR/$tdir
+}
+run_test 102a "check resend (request lost) with multiple modify RPCs in flight"
+
+test_102b() {
+ local idx
+ local facet
+ local num
+ local i
+ local pids pid
+
+ [[ $(lctl get_param mdc.*.import |
+ grep "connect_flags:.*multi_mod_rpc") ]] ||
+ { skip "Need MDC with 'multi_mod_rpcs' feature"; return 0; }
+
+ $LFS mkdir -c1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ idx=$(printf "%04x" $($LFS getdirstripe -i $DIR/$tdir))
+ facet="mds$((0x$idx + 1))"
+
+ # get current value of max_mod_rcps_in_flight
+ num=$($LCTL get_param -n \
+ mdc.$FSNAME-MDT$idx-mdc-*.max_mod_rpcs_in_flight)
+ # set default value if client does not support multi mod RPCs
+ [ -z "$num" ] && num=1
+
+ echo "creating $num files ..."
+ umask 0022
+ for i in $(seq $num); do
+ touch $DIR/$tdir/file-$i
+ done
+
+ # drop reply on MDT to force reconstruction
+ #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a
+ do_facet $facet "$LCTL set_param fail_loc=0x15a"
+ echo "launch $num chmod in parallel ($(date +%H:%M:%S)) ..."
+ for i in $(seq $num); do
+ chmod 0600 $DIR/$tdir/file-$i &
+ pids="$pids $!"
+ done
+ sleep 1
+ do_facet $facet "$LCTL set_param fail_loc=0"
+ for pid in $pids; do
+ wait $pid || error "chmod failed"
+ done
+ echo "done ($(date +%H:%M:%S))"
+
+ # check chmod succeed
+ for i in $(seq $num); do
+ checkstat -vp 0600 $DIR/$tdir/file-$i
+ done
+
+ rm -rf $DIR/$tdir
+}
+run_test 102b "check resend (reply lost) with multiple modify RPCs in flight"
+
+test_102c() {
+ local idx
+ local facet
+ local num
+ local i
+ local pids pid
+
+ [[ $(lctl get_param mdc.*.import |
+ grep "connect_flags:.*multi_mod_rpc") ]] ||
+ { skip "Need MDC with 'multi_mod_rpcs' feature"; return 0; }
+
+ $LFS mkdir -c1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ idx=$(printf "%04x" $($LFS getdirstripe -i $DIR/$tdir))
+ facet="mds$((0x$idx + 1))"
+
+ # get current value of max_mod_rcps_in_flight
+ num=$($LCTL get_param -n \
+ mdc.$FSNAME-MDT$idx-mdc-*.max_mod_rpcs_in_flight)
+ # set default value if client does not support multi mod RPCs
+ [ -z "$num" ] && num=1
+
+ echo "creating $num files ..."
+ umask 0022
+ for i in $(seq $num); do
+ touch $DIR/$tdir/file-$i
+ done
+
+ replay_barrier $facet
+
+ # drop reply on MDT
+ #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a
+ do_facet $facet "$LCTL set_param fail_loc=0x15a"
+ echo "launch $num chmod in parallel ($(date +%H:%M:%S)) ..."
+ for i in $(seq $num); do
+ chmod 0600 $DIR/$tdir/file-$i &
+ pids="$pids $!"
+ done
+ sleep 1
+ do_facet $facet "$LCTL set_param fail_loc=0"
+
+ # fail MDT
+ fail $facet
+
+ for pid in $pids; do
+ wait $pid || error "chmod failed"
+ done
+ echo "done ($(date +%H:%M:%S))"
+
+ # check chmod succeed
+ for i in $(seq $num); do
+ checkstat -vp 0600 $DIR/$tdir/file-$i
+ done
+
+ rm -rf $DIR/$tdir
+}
+run_test 102c "check replay w/o reconstruction with multiple mod RPCs in flight"
+
+test_102d() {
+ local idx
+ local facet
+ local num
+ local i
+ local pids pid
+
+ [[ $(lctl get_param mdc.*.import |
+ grep "connect_flags:.*multi_mod_rpc") ]] ||
+ { skip "Need MDC with 'multi_mod_rpcs' feature"; return 0; }
+
+ $LFS mkdir -c1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ idx=$(printf "%04x" $($LFS getdirstripe -i $DIR/$tdir))
+ facet="mds$((0x$idx + 1))"
+
+ # get current value of max_mod_rcps_in_flight
+ num=$($LCTL get_param -n \
+ mdc.$FSNAME-MDT$idx-mdc-*.max_mod_rpcs_in_flight)
+ # set default value if client does not support multi mod RPCs
+ [ -z "$num" ] && num=1
+
+ echo "creating $num files ..."
+ umask 0022
+ for i in $(seq $num); do
+ touch $DIR/$tdir/file-$i
+ done
+
+ # drop reply on MDT
+ #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a
+ do_facet $facet "$LCTL set_param fail_loc=0x15a"
+ echo "launch $num chmod in parallel ($(date +%H:%M:%S)) ..."
+ for i in $(seq $num); do
+ chmod 0600 $DIR/$tdir/file-$i &
+ pids="$pids $!"
+ done
+ sleep 1
+
+ # write MDT transactions to disk
+ do_facet $facet "sync; sync; sync"
+
+ do_facet $facet "$LCTL set_param fail_loc=0"
+
+ # fail MDT
+ fail $facet
+
+ for pid in $pids; do
+ wait $pid || error "chmod failed"
+ done
+ echo "done ($(date +%H:%M:%S))"
+
+ # check chmod succeed
+ for i in $(seq $num); do
+ checkstat -vp 0600 $DIR/$tdir/file-$i
+ done
+
+ rm -rf $DIR/$tdir
+}
+run_test 102d "check replay & reconstruction with multiple mod RPCs in flight"
+
+test_103() {
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+#define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162
+ do_facet mds1 $LCTL set_param fail_loc=0x80000162
+
+ mkdir -p $DIR/$tdir
+ createmany -o $DIR/$tdir/t- 30 ||
+ error "create files on remote directory failed"
+ sync
+ rm -rf $DIR/$tdir/t-*
+ sync
+#MDS should crash with tr->otr_next_id overflow
+ fail mds1
+}
+run_test 103 "Check otr_next_id overflow"
+
+
+check_striped_dir_110()
+{
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
+ error "create striped dir failed"
+ local stripe_count=$($LFS getdirstripe -c $DIR/$tdir/striped_dir)
+ [ $stripe_count -eq $MDSCOUNT ] ||
+ error "$stripe_count != 2 after recovery"
+}
+
+test_110a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds1
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ fail mds1
+
+ check_striped_dir_110 || error "check striped_dir failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110a "DNE: create striped dir, fail MDT1"
+
+test_110b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds1
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ umount $MOUNT
+ fail mds1
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+
+ check_striped_dir_110 || error "check striped_dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110b "DNE: create striped dir, fail MDT1 and client"
+
+test_110c() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds2
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ fail mds2
+
+ check_striped_dir_110 || error "check striped_dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110c "DNE: create striped dir, fail MDT2"
+
+test_110d() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds2
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ umount $MOUNT
+ fail mds2
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+
+ check_striped_dir_110 || error "check striped_dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110d "DNE: create striped dir, fail MDT2 and client"
+
+test_110e() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds2
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ umount $MOUNT
+ replay_barrier mds1
+ fail mds1,mds2
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+
+ check_striped_dir_110 || error "check striped_dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110e "DNE: create striped dir, uncommit on MDT2, fail client/MDT1/MDT2"
+
+test_110f() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds1
+ replay_barrier mds2
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ fail mds1,mds2
+
+ check_striped_dir_110 || error "check striped_dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110f "DNE: create striped dir, fail MDT1/MDT2"
+
+test_110g() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds1
+ $LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
+ umount $MOUNT
+ replay_barrier mds2
+ fail mds1,mds2
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+
+ check_striped_dir_110 || error "check striped_dir failed"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 110g "DNE: create striped dir, uncommit on MDT1, fail client/MDT1/MDT2"
+
+test_111a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds1
+ rm -rf $DIR/$tdir/striped_dir
+ fail mds1
+
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+ return 0
+}
+run_test 111a "DNE: unlink striped dir, fail MDT1"
+
+test_111b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds2
+ rm -rf $DIR/$tdir/striped_dir
+ umount $MOUNT
+ fail mds2
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+ return 0
+}
+run_test 111b "DNE: unlink striped dir, fail MDT2"
+
+test_111c() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds1
+ rm -rf $DIR/$tdir/striped_dir
+ umount $MOUNT
+ replay_barrier mds2
+ fail mds1,mds2
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+ return 0
+}
+run_test 111c "DNE: unlink striped dir, uncommit on MDT1, fail client/MDT1/MDT2"
+
+test_111d() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds2
+ rm -rf $DIR/$tdir/striped_dir
+ umount $MOUNT
+ replay_barrier mds1
+ fail mds1,mds2
+ zconf_mount $(hostname) $MOUNT
+ client_up || return 1
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+
+ return 0
+}
+run_test 111d "DNE: unlink striped dir, uncommit on MDT2, fail client/MDT1/MDT2"
+
+test_111e() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds2
+ rm -rf $DIR/$tdir/striped_dir
+ replay_barrier mds1
+ fail mds1,mds2
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+ return 0
+}
+run_test 111e "DNE: unlink striped dir, uncommit on MDT2, fail MDT1/MDT2"
+
+test_111f() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds1
+ rm -rf $DIR/$tdir/striped_dir
+ replay_barrier mds2
+ fail mds1,mds2
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+ return 0
+}
+run_test 111f "DNE: unlink striped dir, uncommit on MDT1, fail MDT1/MDT2"
+
+test_111g() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir
+ replay_barrier mds1
+ replay_barrier mds2
+ rm -rf $DIR/$tdir/striped_dir
+ fail mds1,mds2
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir &&
+ error "striped dir still exists"
+ return 0
+}
+run_test 111g "DNE: unlink striped dir, fail MDT1/MDT2"
+
+test_112_rename_prepare() {
+ mkdir -p $DIR/$tdir/src_dir
+ $LFS mkdir -i 1 $DIR/$tdir/src_dir/src_child ||
+ error "create remote source failed"
+
+ touch $DIR/$tdir/src_dir/src_child/a
+
+ $LFS mkdir -i 2 $DIR/$tdir/tgt_dir ||
+ error "create remote target dir failed"
+
+ $LFS mkdir -i 3 $DIR/$tdir/tgt_dir/tgt_child ||
+ error "create remote target child failed"
+}
+
+test_112_check() {
+ find $DIR/$tdir/
+ $CHECKSTAT -t dir $DIR/$tdir/src_dir/src_child &&
+ error "src_child still exists after rename"
+
+ $CHECKSTAT -t file $DIR/$tdir/tgt_dir/tgt_child/a ||
+ error "missing file(a) after rename"
+}
+
+test_112a() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+ fail mds1
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112a "DNE: cross MDT rename, fail MDT1"
+
+test_112b() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds2
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds2
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112b "DNE: cross MDT rename, fail MDT2"
+
+test_112c() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds3
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds3
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112c "DNE: cross MDT rename, fail MDT3"
+
+test_112d() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112d "DNE: cross MDT rename, fail MDT4"
+
+test_112e() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+ replay_barrier mds2
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds1,mds2
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112e "DNE: cross MDT rename, fail MDT1 and MDT2"
+
+test_112f() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+ replay_barrier mds3
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds1,mds3
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112f "DNE: cross MDT rename, fail MDT1 and MDT3"
+
+test_112g() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds1,mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112g "DNE: cross MDT rename, fail MDT1 and MDT4"
+
+test_112h() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds2
+ replay_barrier mds3
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds2,mds3
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112h "DNE: cross MDT rename, fail MDT2 and MDT3"
+
+test_112i() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds2
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds2,mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112i "DNE: cross MDT rename, fail MDT2 and MDT4"
+
+test_112j() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds3
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds3,mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112j "DNE: cross MDT rename, fail MDT3 and MDT4"
+
+test_112k() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+ replay_barrier mds2
+ replay_barrier mds3
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds1,mds2,mds3
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112k "DNE: cross MDT rename, fail MDT1,MDT2,MDT3"
+
+test_112l() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+ replay_barrier mds2
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds1,mds2,mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112l "DNE: cross MDT rename, fail MDT1,MDT2,MDT4"
+
+test_112m() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds1
+ replay_barrier mds3
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds1,mds3,mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112m "DNE: cross MDT rename, fail MDT1,MDT3,MDT4"
+
+test_112n() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ test_112_rename_prepare
+ replay_barrier mds2
+ replay_barrier mds3
+ replay_barrier mds4
+
+ mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+ error "rename dir cross MDT failed!"
+
+ fail mds2,mds3,mds4
+
+ test_112_check
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 112n "DNE: cross MDT rename, fail MDT2,MDT3,MDT4"
+
+test_115() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local fail_index=0
+ local index
+ local i
+ local j
+
+ mkdir -p $DIR/$tdir
+ for ((j=0;j<$((MDSCOUNT));j++)); do
+ fail_index=$((fail_index+1))
+ index=$((fail_index % MDSCOUNT))
+ replay_barrier mds$((index + 1))
+ for ((i=0;i<5;i++)); do
+ test_mkdir -i$index -c$MDSCOUNT $DIR/$tdir/test_$i ||
+ error "create striped dir $DIR/$tdir/test_$i"
+ done
+
+ fail mds$((index + 1))
+ for ((i=0;i<5;i++)); do
+ checkstat -t dir $DIR/$tdir/test_$i ||
+ error "$DIR/$tdir/test_$i does not exist!"
+ done
+ rm -rf $DIR/$tdir/test_* ||
+ error "rmdir fails"
+ done
+}
+run_test 115 "failover for create/unlink striped directory"
+
+test_116a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] &&
+ skip "Do not support large update log before 2.7.55" &&
+ return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local fail_index=0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds1
+
+ # OBD_FAIL_SPLIT_UPDATE_REC 0x1702
+ do_facet mds1 "lctl set_param fail_loc=0x80001702"
+ $LFS setdirstripe -c$MDSCOUNT $DIR/$tdir/striped_dir
+
+ fail mds1
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
+ error "stried_dir does not exists"
+}
+run_test 116a "large update log master MDT recovery"
+
+test_116b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] &&
+ skip "Do not support large update log before 2.7.55" &&
+ return 0
+
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local fail_index=0
+
+ mkdir -p $DIR/$tdir
+ replay_barrier mds2
+
+ # OBD_FAIL_SPLIT_UPDATE_REC 0x1702
+ do_facet mds2 "lctl set_param fail_loc=0x80001702"
+ $LFS setdirstripe -c$MDSCOUNT $DIR/$tdir/striped_dir
+
+ fail mds2
+ $CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
+ error "stried_dir does not exists"
+}
+run_test 116b "large update log slave MDT recovery"
+
+test_117() {
+ [ $MDSCOUNT -lt 4 ] && skip "needs >= 4 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local index
+ local mds_indexs
+
+ mkdir -p $DIR/$tdir
+ $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/remote_dir
+ $LFS setdirstripe -i1 -c$MDSCOUNT $DIR/$tdir/remote_dir_1
+ sleep 2
+
+ # Let's set rdonly on all MDTs, so client will send
+ # replay requests on all MDTs and replay these requests
+ # at the same time. This test will verify the recovery
+ # will not be deadlock in this case, LU-7531.
+ for ((index = 0; index < $((MDSCOUNT)); index++)); do
+ replay_barrier mds$((index + 1))
+ if [ -z $mds_indexs ]; then
+ mds_indexs="${mds_indexs}mds$((index+1))"
+ else
+ mds_indexs="${mds_indexs},mds$((index+1))"
+ fi
+ done
+
+ rm -rf $DIR/$tdir/remote_dir
+ rm -rf $DIR/$tdir/remote_dir_1
+
+ fail $mds_indexs
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 117 "DNE: cross MDT unlink, fail MDT1 and MDT2"
+
+test_118() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] &&
+ skip "Do not support large update log before 2.7.64" &&
+ return 0
+
+ mkdir -p $DIR/$tdir
+
+ $LFS setdirstripe -c2 $DIR/$tdir/striped_dir ||
+ error "setdirstripe fails"
+ $LFS setdirstripe -c2 $DIR/$tdir/striped_dir1 ||
+ error "setdirstripe fails 1"
+ rm -rf $DIR/$tdir/striped_dir* || error "rmdir fails"
+
+ # OBD_FAIL_INVALIDATE_UPDATE 0x1705
+ do_facet mds1 "lctl set_param fail_loc=0x1705"
+ $LFS setdirstripe -c2 $DIR/$tdir/striped_dir
+ $LFS setdirstripe -c2 $DIR/$tdir/striped_dir1
+ do_facet mds1 "lctl set_param fail_loc=0x0"
+
+ replay_barrier mds1
+ $LFS setdirstripe -c2 $DIR/$tdir/striped_dir
+ $LFS setdirstripe -c2 $DIR/$tdir/striped_dir1
+ fail mds1
+
+ true
+}
+run_test 118 "invalidate osp update will not cause update log corruption"
+
+test_119() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] &&
+ skip "Do not support large update log before 2.7.64" &&
+ return 0
+ local stripe_count
+ local hard_timeout=$(do_facet mds1 \
+ "lctl get_param -n mdt.$FSNAME-MDT0000.recovery_time_hard")
+
+ local clients=${CLIENTS:-$HOSTNAME}
+ local time_min=$(recovery_time_min)
+
+ mkdir -p $DIR/$tdir
+ mkdir $DIR/$tdir/tmp
+ rmdir $DIR/$tdir/tmp
+
+ replay_barrier mds1
+ mkdir $DIR/$tdir/dir_1
+ for ((i = 0; i < 20; i++)); do
+ $LFS setdirstripe -c2 $DIR/$tdir/stripe_dir-$i
+ done
+
+ stop mds1
+ change_active mds1
+ wait_for_facet mds1
+
+ #define OBD_FAIL_TGT_REPLAY_DELAY 0x714
+ do_facet mds1 $LCTL set_param fail_loc=0x80000714
+ #sleep (timeout + 5), so mds will evict the client exports,
+ #but DNE update recovery will keep going.
+ do_facet mds1 $LCTL set_param fail_val=$((time_min + 5))
+
+ mount_facet mds1 "-o recovery_time_hard=$time_min"
+
+ wait_clients_import_state "$clients" mds1 FULL
+
+ clients_up || clients_up || error "failover df: $?"
+
+ #revert back the hard timeout
+ do_facet mds1 $LCTL set_param \
+ mdt.$FSNAME-MDT0000.recovery_time_hard=$hard_timeout
+
+ for ((i = 0; i < 20; i++)); do
+ stripe_count=$($LFS getdirstripe -c $DIR/$tdir/stripe_dir-$i)
+ [ $stripe_count == 2 ] || {
+ error "stripe_dir-$i creation replay fails"
+ break
+ }
+ done
+}
+run_test 119 "timeout of normal replay does not cause DNE replay fails "
+
+test_120() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] &&
+ skip "Do not support large update log before 2.7.64" &&
+ return 0
+
+ mkdir $DIR/$tdir
+ replay_barrier_nosync mds1
+ for ((i = 0; i < 20; i++)); do
+ mkdir $DIR/$tdir/dir-$i || {
+ error "create dir-$i fails"
+ break
+ }
+ $LFS setdirstripe -c2 $DIR/$tdir/stripe_dir-$i || {
+ error "create stripe_dir-$i fails"
+ break
+ }
+ done
+
+ fail_abort mds1
+
+ for ((i = 0; i < 20; i++)); do
+ [ ! -e "$DIR/$tdir/dir-$i" ] || {
+ error "dir-$i still exists"
+ break
+ }
+ [ ! -e "$DIR/$tdir/stripe_dir-$i" ] || {
+ error "stripe_dir-$i still exists"
+ break
+ }
+ done
+}
+run_test 120 "DNE fail abort should stop both normal and DNE replay"
+
+complete $SECONDS
check_and_cleanup_lustre
exit_status