echo -n $size
}
+fs_inode_ksize() {
+ local facet=${1:-$SINGLEMDS}
+ local fstype=$(facet_fstype $facet)
+ local size=0
+ case $fstype in
+ ldiskfs) size=4;; # ~4KB per inode
+ zfs) size=11;; # 10 to 11KB per inode
+ esac
+
+ echo -n $size
+}
+
check_gss_daemon_nodes() {
local list=$1
dname=$2
# commit the device label change to disk
if [[ $devicelabel =~ (:[a-zA-Z]{3}[0-9]{4}) ]]; then
+ echo "Commit the device label on ${!dev}"
do_facet $facet "sync; sync; sync"
+ sleep 5
fi
# only for remote client
check_client_load () {
- local client=$1
- local var=$(node_var_name $client)_load
- local TESTLOAD=run_${!var}.sh
-
- ps auxww | grep -v grep | grep $client | grep -q "$TESTLOAD" || return 1
-
- # bug 18914: try to connect several times not only when
- # check ps, but while check_catastrophe also
- local tries=3
- local RC=254
- while [ $RC = 254 -a $tries -gt 0 ]; do
- let tries=$tries-1
- # assume success
- RC=0
- if ! check_catastrophe $client; then
- RC=${PIPESTATUS[0]}
- if [ $RC -eq 254 ]; then
- # FIXME: not sure how long we shuold sleep here
- sleep 10
- continue
- fi
- echo "check catastrophe failed: RC=$RC "
- return $RC
- fi
- done
- # We can continue try to connect if RC=254
- # Just print the warning about this
- if [ $RC = 254 ]; then
- echo "got a return status of $RC from do_node while checking catastrophe on $client"
- fi
-
- # see if the load is still on the client
- tries=3
- RC=254
- while [ $RC = 254 -a $tries -gt 0 ]; do
- let tries=$tries-1
- # assume success
- RC=0
- if ! do_node $client "ps auxwww | grep -v grep | grep -q $TESTLOAD"; then
- RC=${PIPESTATUS[0]}
- sleep 30
- fi
- done
- if [ $RC = 254 ]; then
- echo "got a return status of $RC from do_node while checking (catastrophe and 'ps') the client load on $client"
- # see if we can diagnose a bit why this is
- fi
+ local client=$1
+ local var=$(node_var_name $client)_load
+ local testload=run_${!var}.sh
+
+ ps -C $testload | grep $client || return 1
+
+ # bug 18914: try to connect several times not only when
+ # check ps, but while check_node_health also
+
+ local tries=3
+ local RC=254
+ while [ $RC = 254 -a $tries -gt 0 ]; do
+ let tries=$tries-1
+ # assume success
+ RC=0
+ if ! check_node_health $client; then
+ RC=${PIPESTATUS[0]}
+ if [ $RC -eq 254 ]; then
+ # FIXME: not sure how long we shuold sleep here
+ sleep 10
+ continue
+ fi
+ echo "check node health failed: RC=$RC "
+ return $RC
+ fi
+ done
+ # We can continue try to connect if RC=254
+ # Just print the warning about this
+ if [ $RC = 254 ]; then
+ echo "got a return status of $RC from do_node while checking " \
+ "node health on $client"
+ fi
+
+ # see if the load is still on the client
+ tries=3
+ RC=254
+ while [ $RC = 254 -a $tries -gt 0 ]; do
+ let tries=$tries-1
+ # assume success
+ RC=0
+ if ! do_node $client \
+ "ps auxwww | grep -v grep | grep -q $testload"; then
+ RC=${PIPESTATUS[0]}
+ sleep 30
+ fi
+ done
+ if [ $RC = 254 ]; then
+ echo "got a return status of $RC from do_node while checking " \
+ "(node health and 'ps') the client load on $client"
+ # see if we can diagnose a bit why this is
+ fi
- return $RC
+ return $RC
}
check_client_loads () {
local clients=${1//,/ }
return $rc
}
+lfs_df_check() {
+ local clients=${1:-$CLIENTS}
+
+ if [ -z "$clients" ]; then
+ $LFS df $MOUNT
+ else
+ $PDSH $clients "$LFS df $MOUNT" > /dev/null
+ fi
+}
+
+
clients_up() {
- # not every config has many clients
- sleep 1
- if [ ! -z "$CLIENTS" ]; then
- $PDSH $CLIENTS "stat -f $MOUNT" > /dev/null
- else
- stat -f $MOUNT > /dev/null
- fi
+ # not every config has many clients
+ sleep 1
+ lfs_df_check
}
client_up() {
- local client=$1
- # usually checked on particular client or locally
- sleep 1
- if [ ! -z "$client" ]; then
- $PDSH $client "stat -f $MOUNT" > /dev/null
- else
- stat -f $MOUNT > /dev/null
- fi
+ # usually checked on particular client or locally
+ sleep 1
+ lfs_df_check $1
}
client_evicted() {
true
DEBUGSAVE=""
- [ -n "DEBUGSAVE_SERVER" ] &&
+ [ -n "$DEBUGSAVE_SERVER" ] &&
do_nodes $(comma_list $(all_server_nodes)) \
"$LCTL set_param debug=\\\"${DEBUGSAVE_SERVER}\\\"" ||
true
exit 1
}
+# Throw an error if it's not running in vm - usually for performance
+# verification
+error_not_in_vm() {
+ local virt=$(running_in_vm)
+ if [[ -n "$virt" ]]; then
+ echo "running in VM '$virt', ignore error"
+ error_ignore env=$virt "$@"
+ else
+ error "$@"
+ fi
+}
+
skip_env () {
$FAIL_ON_SKIP_ENV && error false $@ || skip $@
}
cd $SAVE_PWD
reset_fail_loc
check_grant ${testnum} || error "check_grant $testnum failed with $?"
- check_catastrophe || error "LBUG/LASSERT detected"
+ check_node_health
check_dmesg_for_errors || error "Error in dmesg detected"
if [ "$PARALLEL" != "yes" ]; then
ps auxww | grep -v grep | grep -q multiop &&
done
}
-check_catastrophe() {
+check_node_health() {
local nodes=${1:-$(comma_list $(nodes_list))}
- do_nodes $nodes "rc=0;
-val=\\\$($LCTL get_param -n catastrophe 2>&1);
-if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then
- echo \\\$(hostname -s): \\\$val;
- rc=\\\$val;
-fi;
-exit \\\$rc"
+ for node in ${nodes//,/ }; do
+ check_network "$node" 5
+ if [ $? -eq 0 ]; then
+ do_node $node "rc=0;
+ val=\\\$($LCTL get_param -n catastrophe 2>&1);
+ if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then
+ echo \\\$(hostname -s): \\\$val;
+ rc=\\\$val;
+ fi;
+ exit \\\$rc" || error "$node:LBUG/LASSERT detected"
+ fi
+ done
}
mdsrate_cleanup () {