}
run_test 24 "fsync error (should return error)"
+wait_client_evicted () {
+ local facet=$1
+ local exports=$2
+ local varsvc=${facet}_svc
+
+ wait_update $(facet_host $facet) "lctl get_param -n *.${!varsvc}.num_exports | cut -d' ' -f2" $((exports - 1)) $3
+}
+
test_26a() { # was test_26 bug 5921 - evict dead exports by pinger
# this test can only run from a client on a separate node.
remote_ost || { skip "local OST" && return 0; }
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
remote_mds || { skip "local MDS" && return 0; }
- OST_FILE=obdfilter.${ost1_svc}.num_exports
- OST_EXP="`do_facet ost1 lctl get_param -n $OST_FILE`"
- OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
- echo starting with $OST_NEXP1 OST exports
+
+ check_timeout || return 1
+
+ local OST_NEXP=$(do_facet ost1 lctl get_param -n obdfilter.${ost1_svc}.num_exports | cut -d' ' -f2)
+
+ echo starting with $OST_NEXP OST exports
# OBD_FAIL_PTLRPC_DROP_RPC 0x505
do_facet client lctl set_param fail_loc=0x505
# evictor takes up to 2.25x to evict. But if there's a
# race to start the evictor from various obds, the loser
# might have to wait for the next ping.
- echo Waiting for $(($TIMEOUT * 4)) secs
- sleep $(($TIMEOUT * 4))
- OST_EXP="`do_facet ost1 lctl get_param -n $OST_FILE`"
- OST_NEXP2=`echo $OST_EXP | cut -d' ' -f2`
- echo ending with $OST_NEXP2 OST exports
+
+ local rc=0
+ wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4))
+ rc=$?
do_facet client lctl set_param fail_loc=0x0
- [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted"
- return 0
+ [ $rc -eq 0 ] || error "client not evicted from OST"
}
run_test 26a "evict dead exports"
test_26b() { # bug 10140 - evict dead exports by pinger
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+ check_timeout || return 1
client_df
zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
- MDS_FILE=mds.${mds_svc}.num_exports
- MDS_NEXP1="`do_facet mds lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
- OST_FILE=obdfilter.${ost1_svc}.num_exports
- OST_NEXP1="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
- echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
+
+ local MDS_NEXP=$(do_facet mds lctl get_param -n mds.${mds_svc}.num_exports | cut -d' ' -f2)
+ local OST_NEXP=$(do_facet ost1 lctl get_param -n obdfilter.${ost1_svc}.num_exports | cut -d' ' -f2)
+
+ echo starting with $OST_NEXP OST and $MDS_NEXP MDS exports
+
#force umount a client; exports should get evicted
zconf_umount `hostname` $MOUNT2 -f
+
# evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
- # But if there's a race to start the evictor from various obds,
- # the loser might have to wait for the next ping.
- echo Waiting for $(($TIMEOUT * 3)) secs
- sleep $(($TIMEOUT * 3))
- OST_NEXP2="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
- MDS_NEXP2="`do_facet mds lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
- echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
- [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST"
- [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS"
- return 0
+ # But if there's a race to start the evictor from various obds,
+ # the loser might have to wait for the next ping.
+ # PING_INTERVAL max(obd_timeout / 4, 1U)
+ # sleep (2*PING_INTERVAL)
+
+ local rc=0
+ wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \
+ error "Client was not evicted by ost" rc=1
+ wait_client_evicted mds $MDS_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \
+ error "Client was not evicted by mds"
}
run_test 26b "evict dead exports"
return 0
}
+wait_update () {
+ local node=$1
+ local TEST=$2
+ local FINAL=$3
+ local MAX=${4:-90}
+
+ local RESULT
+ local WAIT=0
+ local sleep=5
+ while [ $WAIT -lt $MAX ]; do
+ sleep $sleep
+ RESULT=$(do_node $node "$TEST")
+ if [ $RESULT -eq $FINAL ]; then
+ echo "Updated after $WAIT sec: wanted $FINAL got $RESULT"
+ return 0
+ fi
+ WAIT=$((WAIT + sleep))
+ echo "Waiting $((MAX - WAIT)) secs for update"
+ done
+ echo "Update not seen after $MAX sec: wanted $FINAL got $RESULT"
+ return 3
+}
+
+wait_update_facet () {
+ local facet=$1
+ wait_update $(facet_host $facet) $@
+}
+
wait_delete_completed () {
local TOTALPREV=`lctl get_param -n osc.*.kbytesavail | \
awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
}
wait_for_host() {
- HOST=$1
+ local HOST=$1
check_network "$HOST" 900
while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done
}
wait_for() {
- facet=$1
- HOST=`facet_active_host $facet`
+ local facet=$1
+ local HOST=`facet_active_host $facet`
wait_for_host $HOST
}
#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2)
# as we are in process of changing obd_timeout in different ways
# let's set MAX longer than that
- MAX=$(( timeout * 4 ))
- WAIT=0
+ local MAX=$(( timeout * 4 ))
+ local WAIT=0
while [ $WAIT -lt $MAX ]; do
STATUS=`do_facet mds "lctl get_param -n mds.*-MDT*.recovery_status | grep status"`
echo $STATUS | grep COMPLETE && return 0
}
facet_failover() {
- facet=$1
- sleep_time=$2
+ local facet=$1
+ local sleep_time=$2
echo "Failing $facet on node `facet_active_host $facet`"
shutdown_facet $facet
[ -n "$sleep_time" ] && sleep $sleep_time
zconf_mount `hostname` $1 || error "mount failed"
}
-set_obd_timeout() {
- local facet=$1
- local timeout=$2
-
- do_facet $facet lsmod | grep -q obdclass || \
- do_facet $facet "modprobe obdclass"
-
- do_facet $facet "lctl set_param timeout=$timeout"
-}
-
writeconf_facet () {
local facet=$1
local dev=$2
echo $WRITECONF | grep -q "writeconf" && \
writeconf_all
- set_obd_timeout mds $TIMEOUT
start mds $MDSDEV $MDS_MOUNT_OPTS
# We started mds, now we should set failover variable properly.
# Set mdsfailover_HOST if it is not set (the default failnode).
for num in `seq $OSTCOUNT`; do
DEVNAME=`ostdevname $num`
- set_obd_timeout ost$num $TIMEOUT
start ost$num $DEVNAME $OST_MOUNT_OPTS
# We started ost$num, now we should set ost${num}failover variable properly.
[ -n "$CLIENTS" ] && zconf_mount_clients $CLIENTS $MOUNT2
fi
sleep 5
- init_versions_vars
+ init_param_vars
}
mounted_lustre_filesystems() {
done
}
-init_versions_vars () {
+init_param_vars () {
export MDSVER=$(do_facet mds "lctl get_param version" | cut -d. -f1,2)
export OSTVER=$(do_facet ost1 "lctl get_param version" | cut -d. -f1,2)
export CLIVER=$(lctl get_param version | cut -d. -f 1,2)
+
+ TIMEOUT=$(do_facet mds "lctl get_param -n timeout")
+ log "Using TIMEOUT=$TIMEOUT"
}
check_config () {
fi
}
+check_timeout () {
+ local mdstimeout=$(do_facet mds "lctl get_param -n timeout")
+ local cltimeout=$(lctl get_param -n timeout)
+ echo $timeout
+ if [ $mdstimeout -ne $TIMEOUT ] || [ $mdstimeout -ne $cltimeout ]; then
+ error "timeouts are wrong! mds: $mdstimeout, client: $cltimeout, TIMEOUT=$TIMEOUT"
+ return 1
+ fi
+}
+
check_and_setup_lustre() {
local MOUNTED=$(mounted_lustre_filesystems)
if [ -z "$MOUNTED" ] || ! $(echo $MOUNTED | grep -w -q $MOUNT); then
else
check_config $MOUNT
init_facets_vars
- init_versions_vars
+ init_param_vars
fi
if [ "$ONLY" == "setup" ]; then
exit 0