[ ! -f "$LCTL" ] && export LCTL=$(which lctl)
export LFS=${LFS:-"$LUSTRE/utils/lfs"}
[ ! -f "$LFS" ] && export LFS=$(which lfs)
+ export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"}
+ [ ! -f "$KSOCKLND_CONFIG" ] &&
+ export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null)
export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"}
fi
load_module ../lnet/klnds/$LNETLND
load_module obdclass/obdclass
+ MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"}
load_module ptlrpc/ptlrpc
load_module ptlrpc/gss/ptlrpc_gss
load_module fld/fld
local size=0
case $(facet_fstype $facet) in
- ldiskfs) size=50;; # largest seen is 44, leave some headroom
+ ldiskfs) size=72;; # largest seen is 64, leave some headroom
# grant_block_size is in bytes, allow at least 2x max blocksize
zfs) size=$(lctl get_param osc.$FSNAME*.import |
awk '/grant_block_size:/ {print $2/512; exit;}')
local device=${2:-$FSNAME-OST*}
local name=$3
- do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \
- osd-*.$device.$name 2>&1" | grep -v 'error:'
+ do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name"
}
set_osd_param() {
local name=$3
local value=$4
- do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \
- osd-*.$device.$name=$value 2>&1" | grep -v 'error:'
+ do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value"
}
set_debug_size () {
reboot_facet() {
local facet=$1
local node=$(facet_active_host $facet)
+ local sleep_time=${2:-10}
if [ "$FAILURE_MODE" = HARD ]; then
boot_node $node
else
- sleep 10
+ sleep $sleep_time
fi
}
lfs_df_check
}
+all_mds_up() {
+ (( MDSCOUNT == 1 )) && return
+
+ # wait so that statfs data on MDT expire
+ local delay=$(do_facet $SINGLEMDS lctl \
+ get_param -n osp.*MDT0000*MDT0001.maxage)
+ sleep $delay
+ local nodes=$(comma_list $(mdts_nodes))
+ # initiate statfs RPC, all to all MDTs
+ do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null
+ do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null
+}
+
client_up() {
# usually checked on particular client or locally
sleep 1
facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1)
echo reboot facets: ${affecteds[index]}
- reboot_facet $facet
+ reboot_facet $facet $sleep_time
change_active ${affecteds[index]}
mount_facet $facet -o $abort_type
clients_up || echo "first stat failed: $?"
clients_up || error "post-failover stat: $?"
+ all_mds_up
}
host_nids_address() {
# General functions
wait_for_function () {
- local quiet=""
+ local quiet=""
- # suppress fn both stderr and stdout
- if [ "$1" = "--quiet" ]; then
- shift
- quiet=" > /dev/null 2>&1"
-
- fi
+ # suppress fn both stderr and stdout
+ if [ "$1" = "--quiet" ]; then
+ shift
+ quiet=" > /dev/null 2>&1"
+ fi
- local fn=$1
- local max=${2:-900}
- local sleep=${3:-5}
+ local fn=$1
+ local max=${2:-900}
+ local sleep=${3:-5}
- local wait=0
+ local wait=0
- while true; do
+ while true; do
- eval $fn $quiet && return 0
+ eval $fn $quiet && return 0
- wait=$((wait + sleep))
- [ $wait -lt $max ] || return 1
- echo waiting $fn, $((max - wait)) secs left ...
- sleep $sleep
- done
+ [ $wait -lt $max ] || return 1
+ echo waiting $fn, $((max - wait)) secs left ...
+ wait=$((wait + sleep))
+ [ $wait -gt $max ] && ((sleep -= wait - max))
+ sleep $sleep
+ done
}
check_network() {
# sync all the data and make sure no pending data on server
do_nodes $clients sync
- clients_up # initiate all idling connections
+ do_nodes $clients $LFS df # initiate all idling connections
# get client grant
cli_grant=$(grant_from_clients $clients)
}
ostname_from_index() {
- local uuid=$(ostuuid_from_index $1)
+ local uuid=$(ostuuid_from_index $1 $2)
echo ${uuid/_UUID/}
}
}
_wait_import_state () {
- local expected=$1
- local CONN_PROC=$2
- local maxtime=${3:-$(max_recovery_time)}
- local error_on_failure=${4:-1}
- local CONN_STATE
- local i=0
+ local expected="$1"
+ local CONN_PROC="$2"
+ local maxtime=${3:-$(max_recovery_time)}
+ local err_on_fail=${4:-1}
+ local CONN_STATE
+ local i=0
CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
- while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
- if [ "${expected}" == "DISCONN" ]; then
- # for disconn we can check after proc entry is removed
- [ "x${CONN_STATE}" == "x" ] && return 0
- # with AT enabled, we can have connect request timeout near of
- # reconnect timeout and test can't see real disconnect
- [ "${CONN_STATE}" == "CONNECTING" ] && return 0
- fi
- if [ $i -ge $maxtime ]; then
- [ $error_on_failure -ne 0 ] && \
- error "can't put import for $CONN_PROC into ${expected}" \
- "state after $i sec, have ${CONN_STATE}"
- return 1
- fi
- sleep 1
- # Add uniq for multi-mount case
- CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
- i=$(($i + 1))
- done
+ while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
+ if [[ "${expected}" == "DISCONN" ]]; then
+ # for disconn we can check after proc entry is removed
+ [[ -z "${CONN_STATE}" ]] && return 0
+ # with AT, we can have connect request timeout near
+ # reconnect timeout and test can't see real disconnect
+ [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0
+ fi
+ if (( $i >= $maxtime )); then
+ (( $err_on_fail != 0 )) &&
+ error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}"
+ return 1
+ fi
+ sleep 1
+ # Add uniq for multi-mount case
+ CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null |
+ cut -f2 | uniq)
+ i=$((i + 1))
+ done
- log "$CONN_PROC in ${CONN_STATE} state after $i sec"
- return 0
+ log "$CONN_PROC in ${CONN_STATE} state after $i sec"
+ return 0
}
wait_import_state() {
- local state=$1
- local params=$2
- local maxtime=${3:-$(max_recovery_time)}
- local error_on_failure=${4:-1}
- local param
-
- for param in ${params//,/ }; do
- _wait_import_state $state $param $maxtime $error_on_failure || return
- done
+ local expected="$1"
+ local params="$2"
+ local maxtime=${3:-$(max_recovery_time)}
+ local err_on_fail=${4:-1}
+ local param
+
+ for param in ${params//,/ }; do
+ _wait_import_state "$expected" "$param" $maxtime $err_on_fail ||
+ return
+ done
}
wait_import_state_mount() {
return 0
fi
- wait_import_state $*
+ wait_import_state "$@"
}
# One client request could be timed out because server was not ready
}
wait_clients_import_state () {
- local list=$1
- local facet=$2
- local expected=$3
-
- local facets=$facet
+ local list="$1"
+ local facet="$2"
+ local expected="$3"
+ local facets="$facet"
if [ "$FAILURE_MODE" = HARD ]; then
facets=$(facets_on_host $(facet_active_host $facet))
local proc_path
case $facet in
ost* ) proc_path="osc.$(get_clientosc_proc_path \
- $label).ost_server_uuid" ;;
+ $label).ost_server_uuid" ;;
mds* ) proc_path="mdc.$(get_clientmdc_proc_path \
- $label).mds_server_uuid" ;;
+ $label).mds_server_uuid" ;;
mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \
- $label).mgs_server_uuid" ;;
+ $label).mgs_server_uuid" ;;
*) error "unknown facet!" ;;
esac
# ldiskfs xattrs over one block in size. Allow both the historical
# Lustre feature name (large_xattr) and the upstream name (ea_inode).
large_xattr_enabled() {
- [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1
+ [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0
local mds_dev=$(mdsdevname ${SINGLEMDS//mds/})
fi
echo "waited $((i - 1)) seconds for sync"
}
+
+consume_precreations() {
+ local dir=$1
+ local mfacet=$2
+ local OSTIDX=$3
+ local extra=${4:-2}
+ local OST=$(ostname_from_index $OSTIDX $dir)
+
+ test_mkdir -p $dir/${OST}
+ $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST}
+
+ # on the mdt's osc
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST)
+ local last_id=$(do_facet $mfacet $LCTL get_param -n \
+ osp.$mdtosc_proc.prealloc_last_id)
+ local next_id=$(do_facet $mfacet $LCTL get_param -n \
+ osp.$mdtosc_proc.prealloc_next_id)
+ echo "Creating to objid $last_id on ost $OST..."
+ createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra))
+}
+
+__exhaust_precreations() {
+ local OSTIDX=$1
+ local FAILLOC=$2
+ local FAILIDX=${3:-$OSTIDX}
+ local ofacet=ost$((OSTIDX + 1))
+
+ mkdir_on_mdt0 $DIR/$tdir
+ local mdtidx=$($LFS getstripe -m $DIR/$tdir)
+ local mfacet=mds$((mdtidx + 1))
+ echo OSTIDX=$OSTIDX MDTIDX=$mdtidx
+
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet)
+ do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc*
+
+#define OBD_FAIL_OST_ENOSPC 0x215
+ do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215
+
+ consume_precreations $DIR/$tdir $mfacet $OSTIDX
+
+ do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc*
+ do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC
+}
+
+exhaust_precreations() {
+ __exhaust_precreations $1 $2 $3
+ sleep_maxage
+}
+
+exhaust_all_precreations() {
+ local i
+ for (( i=0; i < OSTCOUNT; i++ )) ; do
+ __exhaust_precreations $i $1 -1
+ done
+ sleep_maxage
+}