require_dsh_mds || exit 0
# Skip these tests
-# bug number: 17466 18857 LU1867
-ALWAYS_EXCEPT="61d 33a 33b 89 $REPLAY_SINGLE_EXCEPT"
+# bug number: 17466 18857 LU-1473
+ALWAYS_EXCEPT="61d 33a 33b 62 $REPLAY_SINGLE_EXCEPT"
# 63 min 7 min AT AT AT AT"
[ "$SLOW" = "no" ] && EXCEPT_SLOW="1 2 3 4 6 12 16 44a 44b 65 66 67 68"
+[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
+# bug number for skipped test: LU-1867 LU-3127
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 89 73b"
+
build_test_filter
check_and_setup_lustre
zconf_mount `hostname` $MOUNT || error "mount fails"
client_up || error "post-failover df failed"
# file shouldn't exist if replay-barrier works as expected
- rm $DIR/$tfile && return 1
+ rm $DIR/$tfile && error "File exists and it shouldn't"
return 0
}
run_test 0c "check replay-barrier"
run_test 20a "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
test_20b() { # bug 10480
- BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
+ local wait_timeout=$((TIMEOUT * 4))
+ local BEFOREUSED
+ local AFTERUSED
- dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
- pid=$!
- while [ ! -e $DIR/$tfile ] ; do
- usleep 60 # give dd a chance to start
- done
+ BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
+ dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
+ pid=$!
+ while [ ! -e $DIR/$tfile ] ; do
+ usleep 60 # give dd a chance to start
+ done
- $GETSTRIPE $DIR/$tfile || return 1
- rm -f $DIR/$tfile || return 2 # make it an orphan
- mds_evict_client
- client_up || client_up || true # reconnect
+ $GETSTRIPE $DIR/$tfile || return 1
+ rm -f $DIR/$tfile || return 2 # make it an orphan
+ mds_evict_client
+ client_up || client_up || true # reconnect
- fail $SINGLEMDS # start orphan recovery
- wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
- wait_mds_ost_sync || return 3
- AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
- log "before $BEFOREUSED, after $AFTERUSED"
- [ $AFTERUSED -gt $((BEFOREUSED + 20)) ] && \
- error "after $AFTERUSED > before $BEFOREUSED"
- return 0
+ do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1"
+
+ fail $SINGLEMDS # start orphan recovery
+ wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
+ wait_delete_completed_mds $wait_timeout || return 3
+
+ AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
+ log "before $BEFOREUSED, after $AFTERUSED"
+ (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
+ error "after $AFTERUSED > before $BEFOREUSED"
+ return 0
}
run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)"
#b=2477,2532
test_40(){
- $LCTL mark multiop $MOUNT/$tfile OS_c
- multiop $MOUNT/$tfile OS_c &
- PID=$!
- writeme -s $MOUNT/${tfile}-2 &
- WRITE_PID=$!
- sleep 1
- facet_failover $SINGLEMDS
+ # always need connection to MDS to verify layout during IO. LU-2628.
+ lctl get_param mdc.*.connect_flags | grep -q layout_lock &&
+ skip "layout_lock needs MDS connection for IO" && return 0
+
+ $LCTL mark multiop $MOUNT/$tfile OS_c
+ multiop $MOUNT/$tfile OS_c &
+ PID=$!
+ writeme -s $MOUNT/${tfile}-2 &
+ WRITE_PID=$!
+ sleep 1
+ facet_failover $SINGLEMDS
#define OBD_FAIL_MDS_CONNECT_NET 0x117
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000117"
- kill -USR1 $PID
- stat1=`count_ost_writes`
- sleep $TIMEOUT
- stat2=`count_ost_writes`
- echo "$stat1, $stat2"
- if [ $stat1 -lt $stat2 ]; then
- echo "writes continuing during recovery"
- RC=0
- else
- echo "writes not continuing during recovery, bug 2477"
- RC=4
- fi
- echo "waiting for writeme $WRITE_PID"
- kill $WRITE_PID
- wait $WRITE_PID
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000117"
+ kill -USR1 $PID
+ stat1=`count_ost_writes`
+ sleep $TIMEOUT
+ stat2=`count_ost_writes`
+ echo "$stat1, $stat2"
+ if [ $stat1 -lt $stat2 ]; then
+ echo "writes continuing during recovery"
+ RC=0
+ else
+ echo "writes not continuing during recovery, bug 2477"
+ RC=4
+ fi
+ echo "waiting for writeme $WRITE_PID"
+ kill $WRITE_PID
+ wait $WRITE_PID
- echo "waiting for multiop $PID"
- wait $PID || return 2
- do_facet client munlink $MOUNT/$tfile || return 3
- do_facet client munlink $MOUNT/${tfile}-2 || return 3
- return $RC
+ echo "waiting for multiop $PID"
+ wait $PID || return 2
+ do_facet client munlink $MOUNT/$tfile || return 3
+ do_facet client munlink $MOUNT/${tfile}-2 || return 3
+ return $RC
}
run_test 40 "cause recovery in ptlrpc, ensure IO continues"
test_44a() { # was test_44
local at_max_saved=0
- local mdcdev=$($LCTL get_param -n devices |
- awk "/ ${FSNAME}-MDT0000-mdc-/ {print \$1}")
+ local mdcdev=$($LCTL dl |
+ awk "/${FSNAME}-MDT0000-mdc-/ {if (\$2 == \"UP\") {print \$1}}")
[ "$mdcdev" ] || return 2
[ $(echo $mdcdev | wc -w) -eq 1 ] ||
{ echo mdcdev=$mdcdev; $LCTL dl; return 3; }
for i in `seq 1 10`; do
echo "$i of 10 ($(date +%s))"
- do_facet $SINGLEMDS "lctl get_param -n mdt.*.mdt.timeouts | grep service"
+ do_facet $SINGLEMDS \
+ "lctl get_param -n md[ts].*.mdt.timeouts | grep service"
#define OBD_FAIL_TGT_CONN_RACE 0x701
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
# lctl below may fail, it is valid case
run_test 44a "race in target handle connect"
test_44b() {
- local mdcdev=$($LCTL get_param -n devices |
- awk "/ ${FSNAME}-MDT0000-mdc-/ {print \$1}")
+ local mdcdev=$($LCTL dl |
+ awk "/${FSNAME}-MDT0000-mdc-/ {if (\$2 == \"UP\") {print \$1}}")
[ "$mdcdev" ] || return 2
[ $(echo $mdcdev | wc -w) -eq 1 ] ||
{ echo mdcdev=$mdcdev; $LCTL dl; return 3; }
for i in `seq 1 10`; do
echo "$i of 10 ($(date +%s))"
- do_facet $SINGLEMDS "lctl get_param -n mdt.*.mdt.timeouts | grep service"
+ do_facet $SINGLEMDS \
+ "lctl get_param -n md[ts].*.mdt.timeouts | grep service"
#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000704"
# lctl below may fail, it is valid case
run_test 44b "race in target handle connect"
test_44c() {
- replay_barrier $SINGLEMDS
- createmany -m $DIR/$tfile-%d 100
+ replay_barrier $SINGLEMDS
+ createmany -m $DIR/$tfile-%d 100 || error "failed to create directories"
#define OBD_FAIL_TGT_RCVG_FLAG 0x712
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
- fail_abort $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 100 && return 1
- fail $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 100 && return 1
- return 0
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
+ fail_abort $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && error "unliked after fail abort"
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && error "unliked after fail"
+ return 0
}
run_test 44c "race in target handle connect"
# b3764 timed out lock replay
test_52() {
- touch $DIR/$tfile
- cancel_lru_locks mdc
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.90) ] &&
+ skip "MDS prior to 2.6.90 handle LDLM_REPLY_NET incorrectly" &&
+ return 0
- multiop $DIR/$tfile s || return 1
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_LDLM_REPLY 0x30c
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
- fail $SINGLEMDS || return 2
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ touch $DIR/$tfile
+ cancel_lru_locks mdc
+
+ multiop_bg_pause $DIR/$tfile s_s || return 1
+ mpid=$!
- $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
+ #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000157"
+
+ fail $SINGLEMDS || return 2
+ kill -USR1 $mpid
+ wait $mpid || return 3
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ lctl set_param fail_loc=0x0
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+ rm -f $DIR/$tfile
}
run_test 52 "time out lock replay (3764)"
}
run_test 57 "test recovery from llog for setattr op"
+cleanup_58() {
+ zconf_umount `hostname` $MOUNT2
+ trap - EXIT
+}
+
#recovery many mds-ost setattr from llog
test_58a() {
mkdir -p $DIR/$tdir
local orig
local new
+ trap cleanup_58 EXIT
+
large_xattr_enabled &&
orig="$(generate_string $(max_xattr_size))" || orig="bar"
[[ "$new" = "$orig" ]] || return 1
rm -f $DIR/$tdir/$tfile
rmdir $DIR/$tdir
- zconf_umount `hostname` $MOUNT2
+ cleanup_58
}
run_test 58b "test replay of setxattr op"
local orig1
local new
+ trap cleanup_58 EXIT
+
if large_xattr_enabled; then
local xattr_size=$(max_xattr_size)
orig="$(generate_string $((xattr_size / 2)))"
[[ "$new" = "$orig1" ]] || return 4
rm -f $DIR/$tdir/$tfile
rmdir $DIR/$tdir
- zconf_umount $HOSTNAME $MOUNT2
+ cleanup_58
}
run_test 58c "resend/reconstruct setxattr op"
# OBD_FAIL_OBD_LLOG_SETUP 0x605
stop mgs
do_facet mgs "lctl set_param fail_loc=0x80000605"
- start mgs $MGSDEV $MGS_MOUNT_OPTS && error "mgs start should have failed"
+ start mgs $(mgsdevname) $MGS_MOUNT_OPTS &&
+ error "mgs start should have failed"
do_facet mgs "lctl set_param fail_loc=0"
- start mgs $MGSDEV $MGS_MOUNT_OPTS || error "cannot restart mgs"
+ start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "cannot restart mgs"
}
run_test 61d "error in llog_setup should cleanup the llog context correctly"
CONN2=$(lctl get_param -n osc.*.stats | awk '/_connect/ {total+=$2} END {print total}')
ATTEMPTS=$(($CONN2 - $CONN1))
echo "$ATTEMPTS osc reconnect attempts on gradual slow"
- [ $ATTEMPTS -gt 0 ] && error_ignore 13721 "AT should have prevented reconnect"
- return 0
+ [ $ATTEMPTS -gt 0 ] &&
+ error_ignore bz13721 "AT should have prevented reconnect"
+ return 0
}
run_test 67a "AT: verify slow request processing doesn't induce reconnects"
# set duration to 900 because it takes some time to boot node
[ "$FAILURE_MODE" = HARD ] && duration=900
+ local elapsed
local start_ts=$(date +%s)
local cmd="rundbench 1 -t $duration"
local pid=""
DBENCH_LIB=$DBENCH_LIB TESTSUITE=$TESTSUITE TESTNAME=$TESTNAME \
MOUNT=$MOUNT DIR=$DIR/$tdir/\\\$(hostname) LCTL=$LCTL $cmd" &
pid=$!
+
+ #LU-1897 wait for all dbench copies to start
+ while ! check_for_process $clients dbench; do
+ elapsed=$(($(date +%s) - start_ts))
+ if [ $elapsed -gt $duration ]; then
+ killall_process $clients dbench
+ error "dbench failed to start on $clients!"
+ fi
+ sleep 1
+ done
+
log "Started rundbench load pid=$pid ..."
- # give rundbench a chance to start, bug 24118
- sleep 12
- local elapsed=$(($(date +%s) - start_ts))
+ elapsed=$(($(date +%s) - start_ts))
local num_failovers=0
while [ $elapsed -lt $duration ]; do
if ! check_for_process $clients dbench; then
- error_noexit "dbench not running on some of $clients!"
+ error_noexit "dbench stopped on some of $clients!"
killall_process $clients dbench
break
fi
}
run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close"
-test_73c() {
- multiop_bg_pause $DIR/$tfile O_tSc || return 3
- pid=$!
- rm -f $DIR/$tfile
-
- replay_barrier $SINGLEMDS
-#define OBD_FAIL_TGT_LAST_REPLAY 0x710
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000710"
- fail $SINGLEMDS
- kill -USR1 $pid
- wait $pid || return 1
- [ -e $DIR/$tfile ] && return 2
- return 0
-}
-run_test 73c "open(O_CREAT), unlink, replay, reconnect at last_replay, close"
-
# bug 18554
test_74() {
local clients=${CLIENTS:-$HOSTNAME}
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1500
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
- fail mds$((MDTIDX + 1))
+ fail mds${MDTIDX}
wait $CLIENT_PID || error "remote creation failed"
return 0
}
-run_test 80a "DNE: create remote dir, drop update rep from MDT1, fail MDT1"
+run_test 80a "DNE: create remote dir, drop update rep from MDT0, fail MDT0"
test_80b() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1500
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
- fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
wait $CLIENT_PID || error "remote creation failed"
return 0
}
-run_test 80b "DNE: create remote dir, drop update rep from MDT1, fail MDT0"
+run_test 80b "DNE: create remote dir, drop update rep from MDT0, fail MDT1"
test_80c() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1500
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1500
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
fail mds${MDTIDX},mds$((MDTIDX + 1))
wait $CLIENT_PID || error "remote creation failed"
mkdir -p $DIR/$tdir
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
fail mds${MDTIDX}
return 0
}
-run_test 80e "DNE: create remote dir, drop MDT0 rep, fail MDT0"
+run_test 80e "DNE: create remote dir, drop MDT1 rep, fail MDT0"
test_80f() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
mkdir -p $DIR/$tdir
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
return 0
}
-run_test 80f "DNE: create remote dir, drop MDT0 rep, fail MDT1"
+run_test 80f "DNE: create remote dir, drop MDT1 rep, fail MDT1"
test_80g() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
mkdir -p $DIR/$tdir
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
return 0
}
-run_test 80g "DNE: create remote dir, drop MDT0 rep, fail MDT0, then MDT1"
+run_test 80g "DNE: create remote dir, drop MDT1 rep, fail MDT0, then MDT1"
test_80h() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
mkdir -p $DIR/$tdir
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
fail mds${MDTIDX},mds$((MDTIDX + 1))
return 0
}
-run_test 80h "DNE: create remote dir, drop MDT0 rep, fail 2 MDTs"
+run_test 80h "DNE: create remote dir, drop MDT1 rep, fail 2 MDTs"
test_81a() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x1500
+ touch $remote_dir
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x1500
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x1500
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x1500
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
}
run_test 81h "DNE: unlink remote dir, drop request reply, fail 2 MDTs"
-test_83a() {
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile- 10 || return 1
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140"
- unlinkmany $DIR/$tdir/$tfile- 10 || return 2
-}
-run_test 83a "fail log_add during unlink recovery"
-
-test_83b() {
- mkdir -p $DIR/$tdir
- createmany -o $DIR/$tdir/$tfile- 10 || return 1
- replay_barrier $SINGLEMDS
- unlinkmany $DIR/$tdir/$tfile- 10 || return 2
-#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000140"
- fail $SINGLEMDS
-}
-run_test 83b "fail log_add during unlink recovery"
-
test_84a() {
#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000144"
dd if=/dev/urandom of=$DIR/$tdir/f-$file_id bs=4096 count=128
done
- # if the objids were not recreated, then "ls" will failed for -ENOENT
+ # if the objids were not recreated, then "ls" will fail with -ENOENT
ls -l $DIR/$tdir/* || error "can't get the status of precreated files"
local file_id
local uuid=$(ostuuid_from_index $i)
for file in f$i all; do
if [[ $dir/$file != $($LFS find --obd $uuid --name $file $dir) ]]; then
- $GETSTRIPE $dir/file
- error wrong stripe: $file, uuid: $uuid
+ $GETSTRIPE $dir/$file
+ error wrong stripe: $file, uuid: $uuid
fi
done
done
# Before failing an OST, get its obd name and index
local varsvc=${ostfail}_svc
local obd=$(do_facet $ostfail lctl get_param -n obdfilter.${!varsvc}.uuid)
- local index=${obd:(-6):1}
+ local index=$(($(facet_number $ostfail) - 1))
echo "Fail $ostfail $obd, display the list of affected files"
shutdown_facet $ostfail || return 2
}
run_test 90 "lfs find identifies the missing striped file segments"
+test_93() {
+ local server_version=$(lustre_version_code $SINGLEMDS)
+ [[ $server_version -ge $(version_code 2.6.90) ]] ||
+ [[ $server_version -ge $(version_code 2.5.4) &&
+ $server_version -lt $(version_code 2.5.50) ]] ||
+ { skip "Need MDS version 2.5.4+ or 2.6.90+"; return; }
+
+ cancel_lru_locks osc
+
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ dd if=/dev/zero of=$DIR/$tfile bs=1024 count=1
+#define OBD_FAIL_TGT_REPLAY_RECONNECT 0x715
+ # We need to emulate a state that OST is waiting for other clients
+ # not completing the recovery. Final ping is queued, but reply will be sent
+ # on the recovery completion. It is done by sleep before processing final
+ # pings
+ do_facet ost1 "$LCTL set_param fail_val=40"
+ do_facet ost1 "$LCTL set_param fail_loc=0x715"
+ fail ost1
+}
+run_test 93 "replay + reconnect"
+
+striped_dir_check_100() {
+ local striped_dir=$DIR/$tdir/striped_dir
+ local stripe_count=$($LFS getdirstripe -c $striped_dir)
+
+ $LFS getdirstripe $striped_dir
+ [ $stripe_count -eq 2 ] || error "$stripe_count != 2"
+
+ createmany -o $striped_dir/f-%d 20 ||
+ error "creation failed under striped dir"
+}
+
+test_100a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local striped_dir=$DIR/$tdir/striped_dir
+ local MDTIDX=1
+
+ mkdir $DIR/$tdir
+
+ #To make sure MDT1 and MDT0 are connected
+ #otherwise it may create single stripe dir here
+ $LFS setdirstripe -i1 $DIR/$tdir/remote_dir
+
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
+ do_facet mds$((MDTIDX+1)) lctl set_param fail_loc=0x1701
+ $LFS setdirstripe -i0 -c2 $striped_dir &
+ local CLIENT_PID=$!
+
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "striped dir creation failed"
+
+ striped_dir_check_100 || error "striped dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 100a "DNE: create striped dir, drop update rep from MDT1, fail MDT1"
+
+test_100b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local striped_dir=$DIR/$tdir/striped_dir
+ local MDTIDX=1
+
+ mkdir $DIR/$tdir
+
+ #To make sure MDT1 and MDT0 are connected
+ #otherwise it may create single stripe dir here
+ $LFS setdirstripe -i1 $DIR/$tdir/remote_dir
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$MDTIDX lctl set_param fail_loc=0x119
+ $LFS mkdir -i0 -c2 $striped_dir &
+
+ local CLIENT_PID=$!
+ fail mds$MDTIDX
+
+ wait $CLIENT_PID || error "striped dir creation failed"
+
+ striped_dir_check_100 || error "striped dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 100b "DNE: create striped dir, fail MDT0"
+
+test_101() { #LU-5648
+ mkdir -p $DIR/$tdir/d1
+ mkdir -p $DIR/$tdir/d2
+ touch $DIR/$tdir/file0
+ num=1000
+
+ replay_barrier $SINGLEMDS
+ for i in $(seq $num) ; do
+ echo test$i > $DIR/$tdir/d1/file$i
+ done
+
+ fail_abort $SINGLEMDS
+ for i in $(seq $num) ; do
+ touch $DIR/$tdir/d2/file$i
+ test -s $DIR/$tdir/d2/file$i &&
+ ls -al $DIR/$tdir/d2/file$i && error "file$i's size > 0"
+ done
+
+ rm -rf $DIR/$tdir
+}
+run_test 101 "Shouldn't reassign precreated objs to other files after recovery"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status