rm -rf $dir || error "removing $dir"
mkdir -p $dir || error "creating $dir"
-
+
echo "mcreating $FILECOUNT files"
createmany -m $dir/f 1 $FILECOUNT || \
error "creating files in $dir"
# needs to be in /etc/groups on MDS, gid == uid
# Let's use RUNAS_ID
T67_UID=${T67_UID:-$RUNAS_ID}
-
+
[ "$UID" = "$T67_UID" ] && skip "UID = T67_UID = $UID -- skipping" && return
check_kernel_version 35 || return 0
do_facet mds grep -q ":$T67_UID:$T67_UID" /etc/passwd || \
chown $RUNAS_ID $DIR/d99cvsroot || error "chown $DIR/d99cvsroot failed"
local oldPWD=$PWD # bug 13584, use $TMP as working dir
cd $TMP
-
+
$RUNAS cvs -d $DIR/d99cvsroot init || error "cvs init failed"
cd $oldPWD
}
local testfile2=${testfile}2
local value=`getfattr -n trusted.lov $testfile 2> /dev/null | \
grep "trusted.lov" |sed -e 's/[^=]\+=//'`
-
+
$MCREATE $testfile2
- setfattr -n trusted.lov -v $value $testfile2
+ setfattr -n trusted.lov -v $value $testfile2
local tmp_file=${testfile}3
$GETSTRIPE -v $testfile2 > $tmp_file
local stripe_size=`grep "size" $tmp_file| awk '{print $2}'`
local testfile2=${testfile}2
local value=`getfattr -n lustre.lov $testfile 2> /dev/null | \
grep "lustre.lov" |sed -e 's/[^=]\+=//' `
-
+
$RUNAS $MCREATE $testfile2
- $RUNAS setfattr -n lustre.lov -v $value $testfile2
+ $RUNAS setfattr -n lustre.lov -v $value $testfile2
local tmp_file=${testfile}3
$RUNAS $GETSTRIPE -v $testfile2 > $tmp_file
local stripe_size=`grep "size" $tmp_file| awk '{print $2}'`
lfs df -i $DIR || error "lfs df -i $DIR failed"
lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed"
lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed"
-
+
OSC=`lctl get_param -n devices | awk '/-osc-|OSC.*MNT/ {print $4}' | head -n 1`
lctl --device %$OSC deactivate
lfs df || error "lfs df with deactivated OSC failed"
test_118a() #bug 11710
{
reset_async
-
+
multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c
DIRTY=$(lctl get_param -n "llite.*.dump_page_cache" | grep -c dirty)
WRITEBACK=$(lctl get_param "llite.*.dump_page_cache" | grep -c writeback)
# until a subsequent RPC completes successfully without error.
multiop $DIR/$tfile Ow4096yc
rm -f $DIR/$tfile
-
+
return 0
}
run_test 118b "Reclaim dirty pages on fatal error =========="
if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then
error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK"
fi
-
+
rm -f $DIR/$tfile
echo "Dirty pages flushed via fsync on EROFS"
return 0
#define OBD_FAIL_OST_BRW_PAUSE_BULK
set_nodes_failloc "$(osts_nodes)" 0x214
# multiop should block due to fsync until pages are written
- multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c &
+ multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c &
MULTIPID=$!
sleep 1
if [[ $RC -eq 0 ]]; then
error "Must return error due to dropped pages, rc=$RC"
fi
-
+
lctl set_param fail_loc=0x0
LOCKED=$(lctl get_param -n "llite.*.dump_page_cache" | grep -c locked)
# simulate local -ENOMEM
multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c
RC=$?
-
+
lctl set_param fail_loc=0
if [[ $RC -eq 0 ]]; then
error "Must return error due to dropped pages, rc=$RC"
if [[ $LOCKED -ne 0 ]]; then
error "Locked pages remain in cache, locked=$LOCKED"
fi
-
+
if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then
error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK"
fi
# Should simulate ENOMEM error which is recoverable and should be handled by timeout
multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c
RC=$?
-
+
set_nodes_failloc "$(osts_nodes)" 0
if [[ $RC -eq 0 ]]; then
error "Must return error due to dropped pages, rc=$RC"
if [[ $LOCKED -ne 0 ]]; then
error "Locked pages remain in cache, locked=$LOCKED"
fi
-
+
if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then
error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK"
fi
#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e
set_nodes_failloc "$(osts_nodes)" 0x20e
-
+
# Should simulate ENOMEM error which is recoverable and should be handled by timeout
multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c &
PID=$!
sleep 5
set_nodes_failloc "$(osts_nodes)" 0
-
+
wait $PID
RC=$?
if [[ $RC -ne 0 ]]; then
if [[ $LOCKED -ne 0 ]]; then
error "Locked pages remain in cache, locked=$LOCKED"
fi
-
+
if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then
error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK"
fi
if [[ $LOCKED -ne 0 ]]; then
error "Locked pages remain in cache, locked=$LOCKED"
fi
-
+
# in recoverable error on OST we want resend and stay until it finished
if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then
error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK"
test_123b () { # statahead(bug 15027)
mkdir -p $DIR/$tdir
createmany -o $DIR/$tdir/$tfile-%d 1000
-
+
cancel_lru_locks mdc
cancel_lru_locks osc
echo "got $COUNT $NAME"
[ ! $MIN ] && error "Missing min value for $NAME proc entry"
eval $NAME=$COUNT || error "Wrong proc format"
-
+
case $NAME in
read_bytes|write_bytes)
[ $MIN -lt 4096 ] && error "min is too small: $MIN"
cd $DIR/$tdir || error "Changing to $DIR/$tdir"
cp /usr/bin/stat . || error "Copying stat to $DIR/$tdir"
- # VFS limits max symlink depth to 5(4KSTACK) or 8
+ # VFS limits max symlink depth to 5(4KSTACK) or 7(8KSTACK) or 8
local i=0
while i=`expr $i + 1`; do
mkdir -p $i || error "Creating dir $i"
done
i=`expr $i - 1`
echo "The symlink depth = $i"
- [ $i -eq 4 -o $i -eq 8 ] || error "Invalid symlink depth"
+ [ $i -eq 5 -o $i -eq 7 -o $i -eq 8 ] || error "Invalid symlink depth"
}
run_test 140 "Check reasonable stack depth (shouldn't LBUG) ===="
cp $TF $DIR/$tfile
sync || error "sync failed"
lctl set_param fail_loc=0
-
+
# discard client's cache
cancel_lru_locks osc
LNETOPTS=$(awk '/^options lnet/ { print $0}' $MODPROBECONF | sed 's/^options lnet //g')
echo $LNETOPTS | grep -q "accept=all" || LNETOPTS="$LNETOPTS accept=all";
# bug 19380
- if [ "$NETTYPE" = "tcp" -o "$NETTYPE" = "o2ib" -o "$NETTYPE" = "ptl" ]; then
- echo $LNETOPTS | grep -q "local_nid_dist_zero=0" ||
- LNETOPTS="$LNETOPTS local_nid_dist_zero=0"
- fi
+ # disable it for now since it only hides the stack overflow upon test w/
+ # local servers
+# if [ "$NETTYPE" = "tcp" -o "$NETTYPE" = "o2ib" -o "$NETTYPE" = "ptl" ]; then
+# echo $LNETOPTS | grep -q "local_nid_dist_zero=0" ||
+# LNETOPTS="$LNETOPTS local_nid_dist_zero=0"
+# fi
echo "lnet options: '$LNETOPTS'"
# note that insmod will ignore anything in modprobe.conf
load_module ../lnet/lnet/lnet $LNETOPTS
done
}
-# client could mount several lustre
+# client could mount several lustre
quota_type () {
local fsname=${1:-$FSNAME}
local rc=0
do_facet mgs lctl get_param mds.${fsname}-MDT*.quota_type || rc=$?
do_nodes $(comma_list $(osts_nodes)) \
lctl get_param obdfilter.${fsname}-OST*.quota_type || rc=$?
- return $rc
+ return $rc
}
restore_quota_type () {
local client=$1
local mnt=$2
local force
- local busy
+ local busy
local need_kill
[ "$3" ] && force=-f
echo \\\$(hostname) env are INSANE!;
exit 1;
fi"
- [ $? -eq 0 ] || rc=1
+ [ $? -eq 0 ] || rc=1
done
return $rc
}
ping -w 3 -c 1 $host > /dev/null 2>&1 || return 0
echo "waiting for $host to fail attempts=$attempts"
[ $i -lt $attempts ] || \
- { echo "$host still pingable after power down! attempts=$attempts" && return 1; }
+ { echo "$host still pingable after power down! attempts=$attempts" && return 1; }
done
}
local attempts=3
if [ "$FAILURE_MODE" = HARD ]; then
- shudown_node_hard $client
+ shudown_node_hard $client
else
zconf_umount_clients $client $mnt -f
fi
done
}
-# only for remote client
+# only for remote client
check_client_load () {
local client=$1
local var=$(client_var_name $client)_load
local TESTLOAD=run_${!var}.sh
ps auxww | grep -v grep | grep $client | grep -q "$TESTLOAD" || return 1
-
+
# bug 18914: try to connect several times not only when
# check ps, but while check_catastrophe also
local tries=3
if [ "$rc" != 0 -a "$expectedfail" ]; then
start_client_load $client
echo "Restarted client load: on $client. Checking ..."
- check_client_load $client
+ check_client_load $client
rc=${PIPESTATUS[0]}
if [ "$rc" != 0 ]; then
log "Client load failed to restart on node $client, rc=$rc"
# failure one client load means test fail
- # we do not need to check other
+ # we do not need to check other
return $rc
fi
else
# as we are in process of changing obd_timeout in different ways
# let's set MAX longer than that
local MAX=${2:-$(( TIMEOUT * 4 ))}
-
+
local var_svc=${facet}_svc
local procfile="*.${!var_svc}.recovery_status"
local WAIT=0
}
init_facets_vars () {
- remote_mds_nodsh ||
+ remote_mds_nodsh ||
init_facet_vars mds $MDSDEV $MDS_MOUNT_OPTS
remote_ost_nodsh && return
export CLIVER=$(lctl get_param version | cut -d. -f 1,2)
fi
- remote_mds_nodsh ||
+ remote_mds_nodsh ||
TIMEOUT=$(do_facet mds "lctl get_param -n timeout")
log "Using TIMEOUT=$TIMEOUT"
check_config () {
local mntpt=$1
- local myMGS_host=$mgs_HOST
+ local myMGS_host=$mgs_HOST
if [ "$NETTYPE" = "ptl" ]; then
- myMGS_host=$(h2ptl $mgs_HOST | sed -e s/@ptl//)
+ myMGS_host=$(h2ptl $mgs_HOST | sed -e s/@ptl//)
fi
echo Checking config lustre mounted on $mntpt
for item in ${excluded//,/ }; do
list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g")
done
- echo $(comma_list $list)
+ echo $(comma_list $list)
}
# list, expand are the comma separated lists
}
pass() {
- $TEST_FAILED && echo -n "FAIL " || echo -n "PASS "
+ $TEST_FAILED && echo -n "FAIL " || echo -n "PASS "
echo $@
}
rnodes=${rnodes//,/ }
local -a nodes=($rnodes)
- local num=${#nodes[@]}
+ local num=${#nodes[@]}
local i=$((RANDOM * num * 2 / 65536))
echo ${nodes[i]}
################################################################################
get_lustre_version () {
- local node=${1:-"mds"}
+ local node=${1:-"mds"}
do_facet $node $LCTL get_param -n version | awk '/^lustre:/ {print $2}'
}
while [ "${CONN_STATE}" != "${expected}" ]; do
# for disconn we can check after proc entry is removed
[ "x${CONN_STATE}" == "x" -a "${expected}" == "DISCONN" ] && return 0
- # disconnect rpc should be wait not more obd_timeout
+ # disconnect rpc should be wait not more obd_timeout
[ $i -ge $(($TIMEOUT * 3 / 2)) ] && \
error "can't put import for ${ost}(${ost_facet}) into ${expected} state" && return 1
sleep 1