X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=3d0398f19679e234da3ea0b21a290a9c71302659;hp=9b78323f8bd00fde545dea0b2079571b62320572;hb=af666bef058c5b7997527fc851a84a89375912fb;hpb=cf2e838320f50a1fc320e74d4c051772b8742742 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 9b78323..3d0398f 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -277,6 +277,8 @@ init_test_env() { [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey) export MCREATE=${MCREATE:-mcreate} export MULTIOP=${MULTIOP:-multiop} + export MMAP_CAT=${MMAP_CAT:-mmap_cat} + export STATX=${STATX:-statx} # Ubuntu, at least, has a truncate command in /usr/bin # so fully path our truncate command. export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate} @@ -291,12 +293,15 @@ init_test_env() { fi export RSYNC_RSH=${RSYNC_RSH:-rsh} + export LNETCTL=${LNETCTL:-"$LUSTRE/../lnet/utils/lnetctl"} + [ ! -f "$LNETCTL" ] && export LNETCTL=$(which lnetctl 2> /dev/null) export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LFS" ] && export LFS=$(which lfs) - SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"} - GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"} + export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"} + [ ! -f "$KSOCKLND_CONFIG" ] && + export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null) export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"} @@ -426,11 +431,21 @@ init_test_env() { # Constants used in more than one test script export LOV_MAX_STRIPE_COUNT=2000 + export DELETE_OLD_POOLS=${DELETE_OLD_POOLS:-false} + export KEEP_POOLS=${KEEP_POOLS:-false} export MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} get_lustre_env + # use localrecov to enable recovery for local clients, LU-12722 + [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || { + export MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o localrecov"} + export MGS_MOUNT_OPTS=${MGS_MOUNT_OPTS:-"-o localrecov"} + } + + [[ $OST1_VERSION -lt $(version_code 2.13.52) ]] || + export OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o localrecov"} } check_cpt_number() { @@ -450,10 +465,10 @@ check_cpt_number() { # Return a numeric version code based on a version string. The version # code is useful for comparison two version strings to see which is newer. version_code() { - # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3" - eval set -- $(tr "[:punct:]" " " <<< $*) + # split arguments like "1.8.6-wc3" into "1", "8", "6", "3" + eval set -- $(tr "[:punct:][a-z]" " " <<< $*) - echo -n "$((($1 << 16) | ($2 << 8) | $3))" + echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0}))) } export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/") @@ -490,14 +505,31 @@ export LINUX_VERSION_CODE=$(version_code ${LINUX_VERSION//\./ }) # output: prints version string to stdout in (up to 4) dotted-decimal values lustre_build_version() { local facet=${1:-client} - local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null || - $LCTL lustre_build_version 2>/dev/null || - $LCTL --version 2>/dev/null | cut -d' ' -f2") + local facet_version=${facet}_VERSION + + # if the global variable is already set, then use that + [ -n "${!facet_version}" ] && echo ${!facet_version} && return + + # this is the currently-running version of the kernel modules + local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null") + # we mostly test 2.10+ systems, only try others if the above fails + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL lustre_build_version 2>/dev/null") + fi + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL --version 2>/dev/null" | + cut -d' ' -f2) + fi local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1) [ -n "$lver" ] && ver="$lver" - sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver | - cut -d. -f1-4 + lver=$(sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' <<<$ver | + tr _ . | cut -d. -f1-4) + + # save in global variable for the future + export $facet_version=$lver + + echo $lver } # Report the Lustre numeric build version code for the supplied facet. @@ -509,6 +541,17 @@ module_loaded () { /sbin/lsmod | grep -q "^\<$1\>" } +check_lfs_df_ret_val() { + # Ignore only EOPNOTSUPP (which is 95; Operation not supported) error + # returned by 'lfs df' for valid dentry but not a lustrefs. + # + # 'lfs df' historically always returned success(0) instead of + # EOPNOTSUPP. This function for compatibility reason, ignores and + # masquerades EOPNOTSUPP as success. + [[ $1 -eq 95 ]] && return 0 + return $1 +} + PRLFS=false lustre_insmod() { local module=$1 @@ -653,9 +696,9 @@ load_modules_local() { # that obviously has nothing to do with this Lustre run # Disable automatic memory scanning to avoid perf hit. if [ -f /sys/kernel/debug/kmemleak ] ; then - echo scan=off > /sys/kernel/debug/kmemleak - echo scan > /sys/kernel/debug/kmemleak - echo clear > /sys/kernel/debug/kmemleak + echo scan=off > /sys/kernel/debug/kmemleak || true + echo scan > /sys/kernel/debug/kmemleak || true + echo clear > /sys/kernel/debug/kmemleak || true fi echo Loading modules from $LUSTRE @@ -680,6 +723,7 @@ load_modules_local() { # if there is more than 4 CPU cores, libcfs should create multiple CPU # partitions. So we just force libcfs to create 2 partitions for # system with 2 or 4 cores + local saved_opts="$MODOPTS_LIBCFS" if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then # force to enable multiple CPU partitions echo "Force libcfs to create 2 CPU partitions" @@ -691,7 +735,7 @@ load_modules_local() { load_module ../libcfs/libcfs/libcfs # Prevent local MODOPTS_LIBCFS being passed as part of environment # variable to remote nodes - unset MODOPTS_LIBCFS + MODOPTS_LIBCFS=$saved_opts set_default_debug load_module ../lnet/lnet/lnet @@ -715,8 +759,8 @@ load_modules_local() { load_module fid/fid load_module lmv/lmv load_module osc/osc - load_module mdc/mdc load_module lov/lov + load_module mdc/mdc load_module mgc/mgc load_module obdecho/obdecho if ! client_only; then @@ -786,21 +830,19 @@ load_modules () { } check_mem_leak () { - LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) - LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) - if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$LEAK_LUSTRE" 1>&2 - echo "$LEAK_PORTALS" 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` || true - echo "Memory leaks detected" - [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true - return 1 - fi + LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) + LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true) + if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then + echo "$LEAK_LUSTRE" 1>&2 + echo "$LEAK_PORTALS" 1>&2 + mv $TMP/debug $TMP/debug-leak.`date +%s` || true + echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true + return 1 + fi } -unload_modules() { - wait_exit_ST client # bug 12845 - +unload_modules_local() { $LUSTRE_RMMOD ldiskfs || return 2 [ -f /etc/udev/rules.d/99-lustre-test.rules ] && @@ -808,15 +850,23 @@ unload_modules() { udevadm control --reload-rules udevadm trigger + check_mem_leak || return 254 + + return 0 +} + +unload_modules() { + local rc=0 + + wait_exit_ST client # bug 12845 + + unload_modules_local || rc=$? + if $LOAD_MODULES_REMOTE; then local list=$(comma_list $(remote_nodes_list)) if [ -n "$list" ]; then echo "unloading modules on: '$list'" - do_rpc_nodes "$list" $LUSTRE_RMMOD ldiskfs - do_rpc_nodes "$list" check_mem_leak - do_rpc_nodes "$list" "rm -f /etc/udev/rules.d/99-lustre-test.rules" - do_rpc_nodes "$list" "udevadm control --reload-rules" - do_rpc_nodes "$list" "udevadm trigger" + do_rpc_nodes "$list" unload_modules_local fi fi @@ -827,10 +877,9 @@ unload_modules() { rm -f $sbin_mount fi - check_mem_leak || return 254 + [[ $rc -eq 0 ]] && echo "modules unloaded." - echo "modules unloaded." - return 0 + return $rc } fs_log_size() { @@ -838,7 +887,7 @@ fs_log_size() { local size=0 case $(facet_fstype $facet) in - ldiskfs) size=50;; # largest seen is 44, leave some headroom + ldiskfs) size=72;; # largest seen is 64, leave some headroom # grant_block_size is in bytes, allow at least 2x max blocksize zfs) size=$(lctl get_param osc.$FSNAME*.import | awk '/grant_block_size:/ {print $2/512; exit;}') @@ -974,6 +1023,22 @@ add_sk_mntflag() { echo -n $mt_opts } +from_build_tree() { + local from_tree + + case $LUSTRE in + /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ + /usr/lib64/lustre ) + from_tree=false + ;; + *) + from_tree=true + ;; + esac + + [ $from_tree = true ] +} + init_gss() { if $SHARED_KEY; then GSS=true @@ -984,16 +1049,6 @@ init_gss() { return fi - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if ! module_loaded ptlrpc_gss; then load_module ptlrpc/gss/ptlrpc_gss module_loaded ptlrpc_gss || @@ -1004,6 +1059,16 @@ init_gss() { start_gss_daemons || error_exit "start gss daemon failed! rc=$?" fi + if $GSS_SK && ! $SK_NO_KEY; then + echo "Loading basic SSK keys on all servers" + do_nodes $(comma_list $(all_server_nodes)) \ + "lgss_sk -t server -l $SK_PATH/$FSNAME.key || true" + do_nodes $(comma_list $(all_server_nodes)) \ + "keyctl show | grep lustre | cut -c1-11 | + sed -e 's/ //g;' | + xargs -IX keyctl setperm X 0x3f3f3f3f" + fi + if $GSS_SK && $SK_NO_KEY; then local numclients=${1:-$CLIENTCOUNT} local clients=${CLIENTS:-$HOSTNAME} @@ -1012,7 +1077,7 @@ init_gss() { SK_NO_KEY=false local lgssc_conf_file="/etc/request-key.d/lgssc.conf" - if $from_build_tree; then + if from_build_tree; then mkdir -p $SK_OM_PATH if grep -q request-key /proc/mounts > /dev/null; then echo "SSK: Request key already mounted." @@ -1031,7 +1096,7 @@ init_gss() { cat $lgssc_conf_file if ! local_mode; then - if $from_build_tree; then + if from_build_tree; then do_nodes $(comma_list $(all_nodes)) "mkdir -p \ $SK_OM_PATH" do_nodes $(comma_list $(all_nodes)) "mount \ @@ -1111,11 +1176,19 @@ init_gss() { OST_MOUNT_OPTS=$(add_sk_mntflag $OST_MOUNT_OPTS) MOUNT_OPTS=$(add_sk_mntflag $MOUNT_OPTS) SEC=$SK_FLAVOR + if [ -z "$LGSS_KEYRING_DEBUG" ]; then + LGSS_KEYRING_DEBUG=4 + fi fi - if [ -n "$LGSS_KEYRING_DEBUG" ]; then + if [ -n "$LGSS_KEYRING_DEBUG" ] && \ + ( local_mode || from_build_tree ); then + lctl set_param -n \ + sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG + elif [ -n "$LGSS_KEYRING_DEBUG" ]; then + do_nodes $(comma_list $(all_nodes)) "modprobe ptlrpc_gss && \ lctl set_param -n \ - sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG + sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG" fi } @@ -1128,16 +1201,6 @@ cleanup_gss() { cleanup_sk() { if $GSS_SK; then - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if $SK_S2S; then do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNM" do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNMCLI" @@ -1150,7 +1213,7 @@ cleanup_sk() { $SK_PATH/$FSNAME*.key $SK_PATH/nodemap/$FSNAME*.key" do_nodes $(comma_list $(all_nodes)) "keyctl show | \ awk '/lustre/ { print \\\$1 }' | xargs -IX keyctl unlink X" - if $from_build_tree; then + if from_build_tree; then # Remove the mount and clean up the files we added to # SK_PATH do_nodes $(comma_list $(all_nodes)) "while grep -q \ @@ -1274,20 +1337,6 @@ devicelabel() { echo -n $label } -mdsdevlabel() { - local num=$1 - local device=$(mdsdevname $num) - local label=$(devicelabel mds$num ${device} | grep -v "CMD: ") - echo -n $label -} - -ostdevlabel() { - local num=$1 - local device=$(ostdevname $num) - local label=$(devicelabel ost$num ${device} | grep -v "CMD: ") - echo -n $label -} - # # Get the device of a facet. # @@ -1520,8 +1569,7 @@ get_osd_param() { local device=${2:-$FSNAME-OST*} local name=$3 - do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \ - osd-*.$device.$name 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name" } set_osd_param() { @@ -1530,8 +1578,7 @@ set_osd_param() { local name=$3 local value=$4 - do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \ - osd-*.$device.$name=$value 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value" } set_debug_size () { @@ -1552,34 +1599,58 @@ set_debug_size () { } set_default_debug () { - local debug=${1:-"$PTLDEBUG"} - local subsys=${2:-"$SUBSYSTEM"} - local debug_size=${3:-$DEBUG_SIZE} + local debug=${1:-"$PTLDEBUG"} + local subsys=${2:-"$SUBSYSTEM"} + local debug_size=${3:-$DEBUG_SIZE} - [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null - [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null + [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null + [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null - [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null + [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null } set_default_debug_nodes () { local nodes="$1" + local debug="${2:-"$PTLDEBUG"}" + local subsys="${3:-"$SUBSYSTEM"}" + local debug_size="${4:-$DEBUG_SIZE}" if [[ ,$nodes, = *,$HOSTNAME,* ]]; then nodes=$(exclude_items_from_list "$nodes" "$HOSTNAME") set_default_debug fi - do_rpc_nodes "$nodes" set_default_debug \ - \\\"$PTLDEBUG\\\" \\\"$SUBSYSTEM\\\" $DEBUG_SIZE || true + [[ -z "$nodes" ]] || + do_rpc_nodes "$nodes" set_default_debug \ + \\\"$debug\\\" \\\"$subsys\\\" $debug_size || true } set_default_debug_facet () { - local facet=$1 - local node=$(facet_active_host $facet) - [ -z "$node" ] && echo "No host defined for facet $facet" && exit 1 + local facet=$1 + local debug="${2:-"$PTLDEBUG"}" + local subsys="${3:-"$SUBSYSTEM"}" + local debug_size="${4:-$DEBUG_SIZE}" + local node=$(facet_active_host $facet) + + [ -n "$node" ] || error "No host defined for facet $facet" + + set_default_debug_nodes $node "$debug" "$subsys" $debug_size +} - set_default_debug_nodes $node +set_params_nodes () { + [[ $# -ge 2 ]] || return 0 + + local nodes=$1 + shift + do_nodes $nodes $LCTL set_param $@ +} + +set_params_clients () { + local clients=${1:-$CLIENTS} + local params=${2:-$CLIENT_LCTL_SETPARAM_PARAM} + + [[ -n $params ]] || return 0 + set_params_nodes $clients $params } set_hostid () { @@ -2135,7 +2206,7 @@ restore_quota() { if [ "$old_MDT_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable = \ + osd-*.$FSNAME-MDT*.quota_slave.enabled = \ $old_MDT_QUOTA_TYPE else do_facet mgs $PERM_CMD \ @@ -2145,7 +2216,7 @@ restore_quota() { if [ "$old_OST_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable = \ + osd-*.$FSNAME-OST*.quota_slave.enabled = \ $old_OST_QUOTA_TYPE else do_facet mgs $LCTL conf_param \ @@ -2159,6 +2230,7 @@ restore_quota() { # This will allow fixing the "lfs df" summary line in the future. lfs_df() { $LFS df $* | sed -e 's/filesystem /filesystem_/' + check_lfs_df_ret_val $? } # Get free inodes on the MDT specified by mdt index, free indoes on @@ -2184,10 +2256,11 @@ mdt_free_inodes() { ost_dev_status() { local ost_idx=$1 local mnt_pnt=${2:-$MOUNT} + local opts=$3 local ost_uuid ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt) - lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' + lfs_df $opts $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' } setup_quota(){ @@ -2205,9 +2278,9 @@ setup_quota(){ if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-MDT*.quota_slave.enabled=$QUOTA_TYPE do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-OST*.quota_slave.enabled=$QUOTA_TYPE else do_facet mgs $PERM_CMD $FSNAME.quota.mdt=$QUOTA_TYPE || error "set mdt quota type failed" @@ -2259,6 +2332,11 @@ zconf_mount() { exit 1 fi + if $GSS_SK; then + # update mount option with skpath + opts=$(add_sk_mntflag $opts) + fi + echo "Starting client: $client: $flags $opts $device $mnt" do_node $client mkdir -p $mnt if [ -n "$FILESET" -a -z "$SKIP_FILESET" ];then @@ -2288,6 +2366,7 @@ zconf_mount() { fi set_default_debug_nodes $client + set_params_clients $client return 0 } @@ -2320,17 +2399,19 @@ zconf_umount() { fi } -# Mount the file system on the MGS -mount_mgs_client() { - do_facet mgs "mkdir -p $MOUNT" - zconf_mount $mgs_HOST $MOUNT $MOUNT_OPTS || - error "unable to mount $MOUNT on MGS" +# Mount the file system on the MDS +mount_mds_client() { + local mds_HOST=${SINGLEMDS}_HOST + echo $mds_HOST + zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS || + error "unable to mount $MOUNT2 on MDS" } -# Unmount the file system on the MGS -umount_mgs_client() { - zconf_umount $mgs_HOST $MOUNT - do_facet mgs "rm -rf $MOUNT" +# Unmount the file system on the MDS +umount_mds_client() { + local mds_HOST=${SINGLEMDS}_HOST + zconf_umount $mds1_HOST $MOUNT2 + do_facet $SINGLEMDS "rmdir $MOUNT2" } # nodes is comma list @@ -2512,6 +2593,7 @@ exit \\\$rc" || return ${PIPESTATUS[0]} do_nodes $clients "mount | grep $mnt' '" set_default_debug_nodes $clients + set_params_clients $clients return 0 } @@ -2645,19 +2727,27 @@ remount_facet() { reboot_facet() { local facet=$1 + local node=$(facet_active_host $facet) + local sleep_time=${2:-10} + if [ "$FAILURE_MODE" = HARD ]; then - reboot_node $(facet_active_host $facet) + boot_node $node else - sleep 10 + sleep $sleep_time fi } boot_node() { - local node=$1 - if [ "$FAILURE_MODE" = HARD ]; then - reboot_node $node - wait_for_host $node - fi + local node=$1 + + if [ "$FAILURE_MODE" = HARD ]; then + reboot_node $node + wait_for_host $node + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $node: $facet" + do_rpc_nodes $node load_modules_local + fi + fi } facets_hosts () { @@ -2737,16 +2827,15 @@ start_client_load() { } start_client_loads () { - local -a clients=(${1//,/ }) - local numloads=${#CLIENT_LOADS[@]} - local testnum + local -a clients=(${1//,/ }) + local numloads=${#CLIENT_LOADS[@]} - for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do - testnum=$((nodenum % numloads)) - start_client_load ${clients[nodenum]} ${CLIENT_LOADS[testnum]} - done - # bug 22169: wait the background threads to start - sleep 2 + for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do + local load=$((nodenum % numloads)) + start_client_load ${clients[nodenum]} ${CLIENT_LOADS[load]} + done + # bug 22169: wait the background threads to start + sleep 2 } # only for remote client @@ -2912,83 +3001,118 @@ stop_client_loads() { } # End recovery-scale functions -# verify that lustre actually cleaned up properly -cleanup_check() { - VAR=$(lctl get_param -n catastrophe 2>&1) - if [ $? = 0 ] ; then - if [ $VAR != 0 ]; then - error "LBUG/LASSERT detected" - fi - fi - BUSY=$(dmesg | grep -i destruct || true) - if [ -n "$BUSY" ]; then - echo "$BUSY" 1>&2 - [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.$(date +%s) - exit 205 - fi - - check_mem_leak || exit 204 - - [[ $($LCTL dl 2>/dev/null | wc -l) -gt 0 ]] && $LCTL dl && - echo "$TESTSUITE: lustre didn't clean up..." 1>&2 && - return 202 || true - - if module_loaded lnet || module_loaded libcfs; then - echo "$TESTSUITE: modules still loaded..." 1>&2 - /sbin/lsmod 1>&2 - return 203 - fi - return 0 -} +## +# wait for a command to return the expected result +# +# This will run @check on @node repeatedly until the output matches @expect +# based on the supplied condition, or until @max_wait seconds have elapsed, +# whichever comes first. @cond may be one of the normal bash operators, +# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted +# in the caller to avoid unintentional evaluation by the shell in the caller. +# +# If @max_wait is not specified, the condition will be checked for up to 90s. +# +# If --verbose is passed as the first argument, the result is printed on each +# value change, otherwise it is only printed after every 10s interval. +# +# If --quiet is passed as the first/second argument, the do_node() command +# will not print the remote command before executing it each time. +# +# Using wait_update_cond() or related helper function is preferable to adding +# a "long enough" wait for some state to change in the background, since +# "long enough" may be too short due to tunables, system config, or running in +# a VM, and must by necessity wait too long for most cases or risk failure. +# +# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait] +wait_update_cond() { + local verbose + local quiet -wait_update () { - local verbose=false - if [[ "$1" == "--verbose" ]]; then - shift - verbose=true - fi + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$1 - local TEST=$2 - local FINAL=$3 - local MAX=${4:-90} - local RESULT - local PREV_RESULT - local WAIT=0 + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=${5:-90} + local result + local prev_result + local waited=0 + local begin=$SECONDS local sleep=1 local print=10 - PREV_RESULT=$(do_node $node "$TEST") - while [ true ]; do - RESULT=$(do_node $node "$TEST") - if [[ "$RESULT" == "$FINAL" ]]; then - [[ -z "$RESULT" || $WAIT -le $sleep ]] || - echo "Updated after ${WAIT}s: wanted '$FINAL'"\ - "got '$RESULT'" + while (( $waited <= $max_wait )); do + result=$(do_node $quiet $node "$check") + + eval [[ "'$result'" $cond "'$expect'" ]] + if [[ $? == 0 ]]; then + [[ -z "$result" || $waited -le $sleep ]] || + echo "Updated after ${waited}s: want '$expect' got '$result'" return 0 fi - if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then - echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\ - "to '$RESULT'" - PREV_RESULT=$RESULT + if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then + [[ -n "$prev_result" ]] && + echo "Changed after ${waited}s: from '$prev_result' to '$result'" + prev_result="$result" fi - [[ $WAIT -ge $MAX ]] && break - [[ $((WAIT % print)) -eq 0 ]] && - echo "Waiting $((MAX - WAIT)) secs for update" - WAIT=$((WAIT + sleep)) + (( $waited % $print == 0 )) && + echo "Waiting $((max_wait - waited))s for '$expect'" sleep $sleep + waited=$((SECONDS - begin)) done - echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'" + echo "Update not seen after ${max_wait}s: want '$expect' got '$result'" return 3 } +# usage: wait_update [--verbose] [--quiet] node check expect [max_wait] +wait_update() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local node="$1" + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait +} + +# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait] +wait_update_facet_cond() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local node=$(facet_active_host $1) + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=$5 + + wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait +} + +# usage: wait_update_facet [--verbose] facet check expect [max_wait] wait_update_facet() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet - local facet=$1 - shift - wait_update $verbose $(facet_active_host $facet) "$@" + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local node=$(facet_active_host $1) + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait } sync_all_data() { @@ -3200,13 +3324,13 @@ wait_delete_completed_mds() { } wait_for_host() { - local hostlist=$1 + local hostlist=$1 - # we can use "for" here because we are waiting the slowest - for host in ${hostlist//,/ }; do - check_network "$host" 900 - done - while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done + # we can use "for" here because we are waiting the slowest + for host in ${hostlist//,/ }; do + check_network "$host" 900 + done + while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done } wait_for_facet() { @@ -3324,14 +3448,15 @@ wait_osts_up() { } wait_destroy_complete () { - echo "Waiting for local destroys to complete" + echo "Waiting for MDT destroys to complete" # MAX value shouldn't be big as this mean server responsiveness # never increase this just to make test pass but investigate # why it takes so long time - local MAX=5 + local MAX=${1:-5} local WAIT=0 + local list=$(comma_list $(mdts_nodes)) while [ $WAIT -lt $MAX ]; do - local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight)) + local -a RPCs=($(do_nodes $list $LCTL get_param -n osp.*.destroys_in_flight)) local con=1 local i @@ -3346,7 +3471,7 @@ wait_destroy_complete () { echo "Waiting ${WAIT}s for local destroys to complete" WAIT=$((WAIT + 1)) done - echo "Local destroys weren't done in $MAX sec." + echo "MDT destroys weren't done in $MAX sec." return 1 } @@ -3411,12 +3536,20 @@ wait_remote_prog () { lfs_df_check() { local clients=${1:-$CLIENTS} + local rc if [ -z "$clients" ]; then - $LFS df $MOUNT + $LFS df $MOUNT > /dev/null + rc=$? else $PDSH $clients "$LFS df $MOUNT" > /dev/null + rc=$? fi + + check_lfs_df_ret_val $rc + rc=$? + + return $rc } clients_up() { @@ -3425,6 +3558,19 @@ clients_up() { lfs_df_check } +all_mds_up() { + (( MDSCOUNT == 1 )) && return + + # wait so that statfs data on MDT expire + local delay=$(do_facet $SINGLEMDS lctl \ + get_param -n osp.*MDT0000*MDT0001.maxage) + sleep $delay + local nodes=$(comma_list $(mdts_nodes)) + # initiate statfs RPC, all to all MDTs + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null +} + client_up() { # usually checked on particular client or locally sleep 1 @@ -3517,7 +3663,7 @@ facet_failover() { facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) echo reboot facets: ${affecteds[index]} - reboot_facet $facet + reboot_facet $facet $sleep_time change_active ${affecteds[index]} @@ -3544,10 +3690,6 @@ facet_failover() { done } -obd_name() { - local facet=$1 -} - replay_barrier() { local facet=$1 do_facet $facet "sync; sync; sync" @@ -3647,25 +3789,29 @@ fail() { } fail_nodf() { - local facet=$1 - facet_failover $facet + local facet=$1 + + facet_failover $facet } fail_abort() { local facet=$1 + local abort_type=${2:-"abort_recovery"} + stop $facet change_active $facet wait_for_facet $facet - mount_facet $facet -o abort_recovery + mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" + all_mds_up } host_nids_address() { local nodes=$1 local net=${2:-"."} - do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @" + do_nodes $nodes "$LCTL list_nids | grep -w $net | cut -f 1 -d @" } h2name_or_ip() { @@ -3915,26 +4061,27 @@ change_active() { } do_node() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local HOST=$1 - shift - local myPDSH=$PDSH - if [ "$HOST" = "$HOSTNAME" ]; then - myPDSH="no_dsh" - elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then - echo "cannot run remote command on $HOST with $myPDSH" - return 128 - fi - if $VERBOSE; then - echo "CMD: $HOST $@" >&2 - $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local HOST=$1 + shift + local myPDSH=$PDSH + + if [ "$HOST" = "$HOSTNAME" ]; then + myPDSH="no_dsh" + elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then + echo "cannot run remote command on $HOST with $myPDSH" + return 128 + fi + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $HOST $@" >&2 + $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : + fi if [[ "$myPDSH" == "rsh" ]] || [[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then @@ -3951,7 +4098,7 @@ do_node() { return 0 fi - if $verbose ; then + if [[ -n "$verbose" ]]; then # print HOSTNAME for myPDSH="no_dsh" if [[ $myPDSH = no_dsh ]]; then $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /" @@ -3964,12 +4111,8 @@ do_node() { return ${PIPESTATUS[0]} } -do_nodev() { - do_node --verbose "$@" -} - single_local_node () { - [ "$1" = "$HOSTNAME" ] + [ "$1" = "$HOSTNAME" ] } # Outputs environment variable assignments that should be passed to remote nodes @@ -4015,45 +4158,42 @@ get_env_vars() { } do_nodes() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local rnodes=$1 - shift + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift - if single_local_node $rnodes; then - if $verbose; then - do_nodev $rnodes "$@" - else - do_node $rnodes "$@" - fi - return $? - fi + local rnodes=$1 + shift - # This is part from do_node - local myPDSH=$PDSH + if single_local_node $rnodes; then + do_node $verbose $quiet $rnodes "$@" + return $? + fi - [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \ - echo "cannot run remote command on $rnodes with $myPDSH" && return 128 + # This is part from do_node + local myPDSH=$PDSH - export FANOUT=$(get_node_count "${rnodes//,/ }") - if $VERBOSE; then - echo "CMD: $rnodes $@" >&2 - $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && + echo "cannot run remote command on $rnodes with $myPDSH" && + return 128 - # do not replace anything from pdsh output if -N is used - # -N Disable hostname: prefix on lines of output. - if $verbose || [[ $myPDSH = *-N* ]]; then - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" - else - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g" - fi - return ${PIPESTATUS[0]} + export FANOUT=$(get_node_count "${rnodes//,/ }") + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $rnodes $@" >&2 + $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : + fi + + # do not replace anything from pdsh output if -N is used + # -N Disable hostname: prefix on lines of output. + if [[ -n "$verbose" || $myPDSH = *-N* ]]; then + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" + else + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g" + fi + return ${PIPESTATUS[0]} } ## @@ -4064,11 +4204,18 @@ do_nodes() { # # usage: do_facet $facet command [arg ...] do_facet() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + local facet=$1 shift - local HOST=$(facet_active_host $facet) - [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1 - do_node $HOST "$@" + local host=$(facet_active_host $facet) + + [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1 + do_node $verbose $quiet $host "$@" } # Function: do_facet_random_file $FACET $FILE $SIZE @@ -4091,7 +4238,7 @@ do_facet_create_file() { } do_nodesv() { - do_nodes --verbose "$@" + do_nodes --verbose "$@" } add() { @@ -4101,6 +4248,12 @@ add() { stop ${facet} -f rm -f $TMP/${facet}active [[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive + + # make sure in-tree ldiskfs is loaded before mkfs + if local_mode && [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then + load_module ../ldiskfs/ldiskfs + fi + do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]} if [[ $(facet_fstype $facet) == zfs ]]; then @@ -4558,6 +4711,8 @@ mkfs_opts() { var=${type}_FS_MKFS_OPTS fs_mkfs_opts+=${!var:+" ${!var}"} + [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project" + [ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts) if [ -n "${fs_mkfs_opts## }" ]; then @@ -5167,6 +5322,11 @@ init_param_vars () { TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout") log "Using TIMEOUT=$TIMEOUT" + # tune down to speed up testing on (usually) small setups + local mgc_timeout=/sys/module/mgc/parameters/mgc_requeue_timeout_min + do_nodes $(comma_list $(nodes_list)) \ + "[ -f $mgc_timeout ] && echo 1 > $mgc_timeout; exit 0" + osc_ensure_active $SINGLEMDS $TIMEOUT osc_ensure_active client $TIMEOUT $LCTL set_param osc.*.idle_timeout=debug @@ -5195,6 +5355,11 @@ init_param_vars () { # $LFS quotaoff -ug $MOUNT > /dev/null 2>&1 fi fi + + (( MDS1_VERSION <= $(version_code 2.13.52) )) || + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param lod.*.mdt_hash=crush" + do_node $(mgs_node) "$LCTL set_param -P *.*.lbug_on_grant_miscount=1" return 0 } @@ -5275,32 +5440,110 @@ check_timeout () { } is_mounted () { - local mntpt=$1 - [ -z $mntpt ] && return 1 - local mounted=$(mounted_lustre_filesystems) + local mntpt=$1 + [ -z $mntpt ] && return 1 + local mounted=$(mounted_lustre_filesystems) - echo $mounted' ' | grep -w -q $mntpt' ' + echo $mounted' ' | grep -w -q $mntpt' ' } -is_empty_dir() { - [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0 - return 1 +create_pools () { + local pool=$1 + local ostsn=${2:-$OSTCOUNT} + local npools=${FS_NPOOLS:-$((OSTCOUNT / ostsn))} + local n + + echo ostsn=$ostsn npools=$npools + if [[ $ostsn -gt $OSTCOUNT ]]; then + echo "request to use $ostsn OSTs in the pool, \ + using max available OSTCOUNT=$OSTCOUNT" + ostsn=$OSTCOUNT + fi + for (( n=0; n < $npools; n++ )); do + p=${pool}$n + if ! $DELETE_OLD_POOLS; then + log "request to not delete old pools: $FSNAME.$p exist?" + if ! check_pool_not_exist $FSNAME.$p; then + echo "Using existing $FSNAME.$p" + $LCTL pool_list $FSNAME.$p + continue + fi + fi + create_pool $FSNAME.$p $KEEP_POOLS || + error "create_pool $FSNAME.$p failed" + + local first=$(( (n * ostsn) % OSTCOUNT )) + local last=$(( (first + ostsn - 1) % OSTCOUNT )) + if [[ $first -le $last ]]; then + pool_add_targets $p $first $last || + error "pool_add_targets $p $first $last failed" + else + pool_add_targets $p $first $(( OSTCOUNT - 1 )) || + error "pool_add_targets $p $first \ + $(( OSTCOUNT - 1 )) failed" + pool_add_targets $p 0 $last || + error "pool_add_targets $p 0 $last failed" + fi + done } -# empty lustre filesystem may have empty directories lost+found and .lustre -is_empty_fs() { - # exclude .lustre & lost+found - [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \ - -print | wc -l) = 1 ] || return 1 - [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found || return 1 - if [ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.4.0) ]; then - # exclude .lustre/fid (LU-2780) - [ $(find $1/.lustre -maxdepth 1 -name fid -prune -o \ - -print | wc -l) = 1 ] || return 1 - else - [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre || return 1 - fi - return 0 +set_pools_quota () { + local u + local o + local p + local i + local j + + [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \ + $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!" + + # POOLS_QUOTA_USERS_SET= + # "quota15_1:20M -- for all of the found pools + # quota15_2:1G:gpool0 + # quota15_3 -- for global limit only + # quota15_4:200M:gpool0 + # quota15_4:200M:gpool1" + + declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"}) + declare -a pq_users + declare -A pq_limits + + for ((i=0; i<${#pq_userset[@]}; i++)); do + u=${pq_userset[i]%%:*} + o="" + # user gets no pool limits if + # POOLS_QUOTA_USERS_SET does not specify it + [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:} + pq_limits[$u]+=" $o" + done + pq_users=(${!pq_limits[@]}) + + declare -a opts + local pool + + for ((i=0; i<${#pq_users[@]}; i++)); do + u=${pq_users[i]} + # set to max limit (_u64) + $LFS setquota -u $u -B $((2**24 - 1))T $DIR + opts=(${pq_limits[$u]}) + for ((j=0; j<${#opts[@]}; j++)); do + p=${opts[j]##*:} + o=${opts[j]%%:*} + # Set limit for all existing pools if + # no pool specified + if [ $p == $o ]; then + p=$(list_pool $FSNAME | sed "s/$FSNAME.//") + echo "No pool specified for $u, + set limit $o for all existing pools" + fi + for pool in $p; do + $LFS setquota -u $u -B $o --pool $pool $DIR || + error "setquota -u $u -B $o \ + --pool $pool failed" + done + done + $LFS quota -uv $u --pool $DIR + done } check_and_setup_lustre() { @@ -5351,17 +5594,18 @@ check_and_setup_lustre() { export I_MOUNTED2=yes fi - if $do_check; then - # FIXME: what to do if check_config failed? - # i.e. if: - # 1) remote client has mounted other Lustre fs? - # 2) lustre is mounted on remote_clients atall ? - check_config_clients $MOUNT - init_facets_vars - init_param_vars + if $do_check; then + # FIXME: what to do if check_config failed? + # i.e. if: + # 1) remote client has mounted other Lustre fs? + # 2) lustre is mounted on remote_clients atall ? + check_config_clients $MOUNT + init_facets_vars + init_param_vars - set_default_debug_nodes $(comma_list $(nodes_list)) - fi + set_default_debug_nodes $(comma_list $(nodes_list)) + set_params_clients + fi if [ -z "$CLIENTONLY" -a $(lower $OSD_TRACK_DECLARES_LBUG) == 'yes' ]; then local facets="" @@ -5378,12 +5622,25 @@ check_and_setup_lustre() { fi fi + if [ -n "$fs_STRIPEPARAMS" ]; then + setstripe_getstripe $MOUNT $fs_STRIPEPARAMS + fi if $GSS_SK; then set_flavor_all null elif $GSS; then set_flavor_all $SEC fi + if $DELETE_OLD_POOLS; then + destroy_all_pools + fi + if [[ -n "$FS_POOL" ]]; then + create_pools $FS_POOL $FS_POOL_NOSTS + fi + + if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then + set_pools_quota + fi if [ "$ONLY" == "setup" ]; then exit 0 fi @@ -5414,60 +5671,17 @@ cleanup_and_setup_lustre() { check_and_setup_lustre } -# Get all of the server target devices from a given server node and type. -get_mnt_devs() { +# Run e2fsck on MDT or OST device. +run_e2fsck() { local node=$1 - local type=$2 - local devs - local dev - - if [ "$type" == ost ]; then - devs=$(get_osd_param $node "" mntdev) - else - devs=$(do_node $node $LCTL get_param -n osd-*.$FSNAME-M*.mntdev) - fi - for dev in $devs; do - case $dev in - *loop*) do_node $node "losetup $dev" | \ - sed -e "s/.*(//" -e "s/).*//" ;; - *) echo $dev ;; - esac - done -} - -# Get all of the server target devices. -get_svr_devs() { - local node - local i - - # Master MDS parameters used by lfsck - MDTNODE=$(facet_active_host $SINGLEMDS) - MDTDEV=$(echo $(get_mnt_devs $MDTNODE mdt) | awk '{print $1}') - - # MDT devices - i=0 - for node in $(mdts_nodes); do - MDTDEVS[i]=$(get_mnt_devs $node mdt) - i=$((i + 1)) - done - - # OST devices - i=0 - for node in $(osts_nodes); do - OSTDEVS[i]=$(get_mnt_devs $node ost) - i=$((i + 1)) - done -} - -# Run e2fsck on MDT or OST device. -run_e2fsck() { - local node=$1 - local target_dev=$2 - local extra_opts=$3 - local cmd="$E2FSCK -d -v -t -t -f $extra_opts $target_dev" - local log=$TMP/e2fsck.log - local rc=0 + local target_dev=$2 + local extra_opts=$3 + local cmd="$E2FSCK -d -v -t -t -f $extra_opts $target_dev" + local log=$TMP/e2fsck.log + local rc=0 + # turn on pfsck if it is supported + do_node $node $E2FSCK -h 2>&1 | grep -qw -- -m && cmd+=" -m8" echo $cmd do_node $node $cmd 2>&1 | tee $log rc=${PIPESTATUS[0]} @@ -5628,30 +5842,30 @@ check_and_cleanup_lustre() { # General functions wait_for_function () { - local quiet="" - - # suppress fn both stderr and stdout - if [ "$1" = "--quiet" ]; then - shift - quiet=" > /dev/null 2>&1" + local quiet="" - fi + # suppress fn both stderr and stdout + if [ "$1" = "--quiet" ]; then + shift + quiet=" > /dev/null 2>&1" + fi - local fn=$1 - local max=${2:-900} - local sleep=${3:-5} + local fn=$1 + local max=${2:-900} + local sleep=${3:-5} - local wait=0 + local wait=0 - while true; do + while true; do - eval $fn $quiet && return 0 + eval $fn $quiet && return 0 - wait=$((wait + sleep)) - [ $wait -lt $max ] || return 1 - echo waiting $fn, $((max - wait)) secs left ... - sleep $sleep - done + [ $wait -lt $max ] || return 1 + echo waiting $fn, $((max - wait)) secs left ... + wait=$((wait + sleep)) + [ $wait -gt $max ] && ((sleep -= wait - max)) + sleep $sleep + done } check_network() { @@ -5661,13 +5875,10 @@ check_network() { [ "$host" = "$HOSTNAME" ] && return 0 - echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..." - if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then - echo "Network not available!" + if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep; then + echo "$(date +'%H:%M:%S (%s)') waited for $host network ${max}s" exit 1 fi - - echo "$(date +'%H:%M:%S (%s)') network interface is UP" } no_dsh() { @@ -5785,10 +5996,6 @@ at_max_get() { at_get $1 at_max } -at_min_get() { - at_get $1 at_min -} - at_max_set() { local at_max=$1 shift @@ -5944,9 +6151,9 @@ cancel_lru_locks() { default_lru_size() { - NR_CPU=$(grep -c "processor" /proc/cpuinfo) - DEFAULT_LRU_SIZE=$((100 * NR_CPU)) - echo "$DEFAULT_LRU_SIZE" + local nr_cpu=$(grep -c "processor" /proc/cpuinfo) + + echo $((100 * nr_cpu)) } lru_resize_enable() @@ -5956,13 +6163,18 @@ lru_resize_enable() lru_resize_disable() { - lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size) + local dev=${1} + local lru_size=${2:-$(default_lru_size)} + + $LCTL set_param ldlm.namespaces.*$dev*.lru_size=$lru_size } flock_is_enabled() { + local mountpath=${1:-$MOUNT} local RC=0 - [ -z "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ] && RC=1 + + [ -z "$(mount | grep "$mountpath .*flock" | grep -v noflock)" ] && RC=1 return $RC } @@ -6007,19 +6219,19 @@ debug_size_restore() { } start_full_debug_logging() { - debugsave - debug_size_save + debugsave + debug_size_save - local FULLDEBUG=-1 - local DEBUG_SIZE=150 + local fulldebug=-1 + local debug_size=150 + local nodes=$(comma_list $(nodes_list)) - do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE" - do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug=$FULLDEBUG;" + do_nodes $nodes "$LCTL set_param debug=$fulldebug debug_mb=$debug_size" } stop_full_debug_logging() { - debug_size_restore - debugrestore + debug_size_restore + debugrestore } # prints bash call stack @@ -6076,7 +6288,7 @@ report_error() { # usage: stack_trap arg sigspec # # stack_trap() behaves like bash's built-in trap, except that it "stacks" the -# command ``arg`` on top of previously defined commands for ``sigspec`` instead +# command "arg" on top of previously defined commands for "sigspec" instead # of overwriting them. # stacked traps are executed in reverse order of their registration # @@ -6084,7 +6296,7 @@ report_error() { stack_trap() { local arg="$1" - local sigspec="$2" + local sigspec="${2:-EXIT}" # Use "trap -p" to get the quoting right local old_trap="$(trap -p "$sigspec")" @@ -6109,9 +6321,14 @@ error_noexit() { exit_status () { local status=0 - local log=$TESTSUITELOG + local logs="$TESTSUITELOG $1" + + for log in $logs; do + if [ -f "$log" ]; then + grep -qw FAIL $log && status=1 + fi + done - [ -f "$log" ] && grep -qw FAIL $log && status=1 exit $status } @@ -6176,6 +6393,7 @@ skip_noexit() { [[ -n "$TESTSUITELOG" ]] && echo "$TESTSUITE: SKIP: $TESTNAME $@" >> $TESTSUITELOG || true + unset TESTNAME } skip() { @@ -6196,29 +6414,29 @@ build_test_filter() { fi done - [ "$EXCEPT$ALWAYS_EXCEPT" ] && \ - log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" - [ "$EXCEPT_SLOW" ] && \ - log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" - for E in $EXCEPT; do - eval EXCEPT_${E}=true - done - for E in $ALWAYS_EXCEPT; do - eval EXCEPT_ALWAYS_${E}=true - done - for E in $EXCEPT_SLOW; do - eval EXCEPT_SLOW_${E}=true - done - for G in $GRANT_CHECK_LIST; do - eval GCHECK_ONLY_${G}=true - done + [ "$EXCEPT$ALWAYS_EXCEPT" ] && + log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" + [ "$EXCEPT_SLOW" ] && + log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" + for E in $EXCEPT; do + eval EXCEPT_${E}=true + done + for E in $ALWAYS_EXCEPT; do + eval EXCEPT_ALWAYS_${E}=true + done + for E in $EXCEPT_SLOW; do + eval EXCEPT_SLOW_${E}=true + done + for G in $GRANT_CHECK_LIST; do + eval GCHECK_ONLY_${G}=true + done } basetest() { if [[ $1 = [a-z]* ]]; then echo $1 else - echo ${1%%[a-z]*} + echo ${1%%[a-zA-Z]*} fi } @@ -6226,59 +6444,63 @@ basetest() { export LAST_SKIPPED= export ALWAYS_SKIPPED= # -# Main entry into test-framework. This is called with the name and -# description of a test. The name is used to find the function to run +# Main entry into test-framework. This is called with the number and +# description of a test. The number is used to find the function to run # the test using "test_$name". # # This supports a variety of methods of specifying specific test to -# run or not run. These need to be documented... +# run or not run: +# - ONLY= env variable with space-separated list of test numbers to run +# - EXCEPT= env variable with space-separated list of test numbers to exclude # run_test() { assert_DIR - export base=$(basetest $1) - TESTNAME=test_$1 + local testnum=$1 + local testmsg=$2 + export base=$(basetest $testnum) + export TESTNAME=test_$testnum LAST_SKIPPED= ALWAYS_SKIPPED= # Check the EXCEPT, ALWAYS_EXCEPT and SLOW lists to see if we # need to skip the current test. If so, set the ALWAYS_SKIPPED flag. - local testname=EXCEPT_$1 - local testname_base=EXCEPT_$base - if [ ${!testname}x != x ]; then + local isexcept=EXCEPT_$testnum + local isexcept_base=EXCEPT_$base + if [ ${!isexcept}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping excluded test $1" - elif [ ${!testname_base}x != x ]; then + skip_message="skipping excluded test $testnum" + elif [ ${!isexcept_base}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping excluded test $1 (base $base)" + skip_message="skipping excluded test $testnum (base $base)" fi - testname=EXCEPT_ALWAYS_$1 - testname_base=EXCEPT_ALWAYS_$base - if [ ${!testname}x != x ]; then + isexcept=EXCEPT_ALWAYS_$testnum + isexcept_base=EXCEPT_ALWAYS_$base + if [ ${!isexcept}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping ALWAYS excluded test $1" - elif [ ${!testname_base}x != x ]; then + skip_message="skipping ALWAYS excluded test $testnum" + elif [ ${!isexcept_base}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping ALWAYS excluded test $1 (base $base)" + skip_message="skipping ALWAYS excluded test $testnum (base $base)" fi - testname=EXCEPT_SLOW_$1 - testname_base=EXCEPT_SLOW_$base - if [ ${!testname}x != x ]; then + isexcept=EXCEPT_SLOW_$testnum + isexcept_base=EXCEPT_SLOW_$base + if [ ${!isexcept}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping SLOW test $1" - elif [ ${!testname_base}x != x ]; then + skip_message="skipping SLOW test $testnum" + elif [ ${!isexcept_base}x != x ]; then ALWAYS_SKIPPED="y" - skip_message="skipping SLOW test $1 (base $base)" + skip_message="skipping SLOW test $testnum (base $base)" fi # If there are tests on the ONLY list, check if the current test # is on that list and, if so, check if the test is to be skipped # and if we are supposed to honor the skip lists. if [ -n "$ONLY" ]; then - testname=ONLY_$1 - testname_base=ONLY_$base - if [[ ${!testname}x != x || ${!testname_base}x != x ]]; then + local isonly=ONLY_$testnum + local isonly_base=ONLY_$base + if [[ ${!isonly}x != x || ${!isonly_base}x != x ]]; then if [[ -n "$ALWAYS_SKIPPED" && -n "$HONOR_EXCEPT" ]]; then LAST_SKIPPED="y" @@ -6288,7 +6510,7 @@ run_test() { [ -n "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED= ALWAYS_SKIPPED= - run_one_logged $1 "$2" + run_one_logged $testnum "$testmsg" return $? fi @@ -6303,10 +6525,9 @@ run_test() { skip_noexit "$skip_message" return 0 else - run_one_logged $1 "$2" + run_one_logged $testnum "$testmsg" return $? fi - } log() { @@ -6364,10 +6585,10 @@ check_mds() { } reset_fail_loc () { - echo -n "Resetting fail_loc on all nodes..." - do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \ - fail_val=0 2>/dev/null" || true - echo done. + #echo -n "Resetting fail_loc on all nodes..." + do_nodes --quiet $(comma_list $(nodes_list)) \ + "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true + #echo done. } @@ -6376,7 +6597,8 @@ reset_fail_loc () { # Also appends a timestamp and prepends the testsuite name. # -EQUALS="====================================================================================================" +# ======================================================== 15:06:12 (1624050372) +EQUALS="========================================================" banner() { msg="== ${TESTSUITE} $*" last=${msg: -1:1} @@ -6392,7 +6614,7 @@ check_dmesg_for_errors() { ldiskfs_check_descriptors: Checksum for group 0 failed\|\ group descriptors corrupted" - res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors") + res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors") [ -z "$res" ] && return 0 echo "Kernel error detected: $res" return 1 @@ -6406,10 +6628,7 @@ group descriptors corrupted" # run_one() { local testnum=$1 - local message=$2 - export tfile=f${testnum}.${TESTSUITE} - export tdir=d${testnum}.${TESTSUITE} - export TESTNAME=test_$testnum + local testmsg="$2" local SAVE_UMASK=`umask` umask 0022 @@ -6417,7 +6636,7 @@ run_one() { $SETUP fi - banner "test $testnum: $message" + banner "test $testnum: $testmsg" test_${testnum} || error "test_$testnum failed with $?" cd $SAVE_PWD reset_fail_loc @@ -6425,12 +6644,9 @@ run_one() { check_node_health check_dmesg_for_errors || error "Error in dmesg detected" if [ "$PARALLEL" != "yes" ]; then - ps auxww | grep -v grep | grep -q multiop && + ps auxww | grep -v grep | grep -q "multiop " && error "multiop still running" fi - unset TESTNAME - unset tdir - unset tfile umask $SAVE_UMASK $CLEANUP return 0 @@ -6443,49 +6659,72 @@ run_one() { # - test result is saved to data file # run_one_logged() { - local BEFORE=$(date +%s) - local TEST_ERROR - local name=${TESTSUITE}.test_${1}.test_log.$(hostname -s).log - local test_log=$LOGDIR/$name - local zfs_log_name=${TESTSUITE}.test_${1}.zfs_log - local zfs_debug_log=$LOGDIR/$zfs_log_name - rm -rf $LOGDIR/err - rm -rf $LOGDIR/ignore - rm -rf $LOGDIR/skip + local before=$SECONDS + local testnum=$1 + local testmsg=$2 + export tfile=f${testnum}.${TESTSUITE} + export tdir=d${testnum}.${TESTSUITE} + local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log + local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log local SAVE_UMASK=$(umask) + local rc=0 umask 0022 + rm -f $LOGDIR/err $LOGDIR/ignore $LOGDIR/skip echo - log_sub_test_begin test_${1} - (run_one $1 "$2") 2>&1 | tee -i $test_log - local RC=${PIPESTATUS[0]} - - [ $RC -ne 0 ] && [ ! -f $LOGDIR/err ] && - echo "test_$1 returned $RC" | tee $LOGDIR/err - - duration=$(($(date +%s) - $BEFORE)) - pass "$1" "(${duration}s)" + # if ${ONLY_$testnum} set, repeat $ONLY_REPEAT times, otherwise once + local isonly=ONLY_$testnum + local repeat=${!isonly:+$ONLY_REPEAT} + + for testiter in $(seq ${repeat:-1}); do + local before_sub=$SECONDS + log_sub_test_begin $TESTNAME + + # remove temp files between repetitions to avoid test failures + [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] && + rm -rvf $DIR/$tdir* $DIR/$tfile* + # loop around subshell so stack_trap EXIT triggers each time + (run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log + rc=${PIPESTATUS[0]} + local append=-a + local duration_sub=$((SECONDS - before_sub)) + local test_error + + [[ $rc != 0 && ! -f $LOGDIR/err ]] && + echo "$TESTNAME returned $rc" | tee $LOGDIR/err + + if [[ -f $LOGDIR/err ]]; then + test_error=$(cat $LOGDIR/err) + TEST_STATUS="FAIL" + elif [[ -f $LOGDIR/ignore ]]; then + test_error=$(cat $LOGDIR/ignore) + elif [[ -f $LOGDIR/skip ]]; then + test_error=$(cat $LOGDIR/skip) + TEST_STATUS="SKIP" + else + TEST_STATUS="PASS" + fi - if [[ -f $LOGDIR/err ]]; then - TEST_ERROR=$(cat $LOGDIR/err) - elif [[ -f $LOGDIR/ignore ]]; then - TEST_ERROR=$(cat $LOGDIR/ignore) - elif [[ -f $LOGDIR/skip ]]; then - TEST_ERROR=$(cat $LOGDIR/skip) - fi - log_sub_test_end $TEST_STATUS $duration "$RC" "$TEST_ERROR" + pass "$testnum" "(${duration_sub}s)" + log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error" + [[ $rc != 0 ]] && break + done - if [[ "$TEST_STATUS" != "SKIP" ]] && [[ -f $TF_SKIP ]]; then + if [[ "$TEST_STATUS" != "SKIP" && -f $TF_SKIP ]]; then rm -f $TF_SKIP fi if [ -f $LOGDIR/err ]; then log_zfs_info "$zfs_debug_log" - $FAIL_ON_ERROR && exit $RC + $FAIL_ON_ERROR && exit $rc fi umask $SAVE_UMASK + unset TESTNAME + unset tdir + unset tfile + return 0 } @@ -6498,52 +6737,69 @@ skip_logged(){ log_sub_test_end "SKIP" "0" "0" "$@" } -canonical_path() { - (cd $(dirname $1); echo $PWD/$(basename $1)) +grant_from_clients() { + local nodes="$1" + + # get client grant + do_nodes $nodes "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" | + calc_sum } +grant_from_servers() { + local nodes="$1" + + # get server grant + # which is tot_granted less grant_precreate + do_nodes $nodes "$LCTL get_param obdfilter.${FSNAME}-OST*.tot_granted" \ + " obdfilter.${FSNAME}-OST*.tot_pending" \ + " obdfilter.${FSNAME}-OST*.grant_precreate" | + tr '=' ' ' | awk '/tot_granted/{ total += $2 }; + /tot_pending/{ total -= $2 }; + /grant_precreate/{ total -= $2 }; + END { printf("%0.0f", total) }' +} check_grant() { export base=$(basetest $1) [ "$CHECK_GRANT" == "no" ] && return 0 - testnamebase=GCHECK_ONLY_${base} - testname=GCHECK_ONLY_$1 - [ ${!testnamebase}x == x -a ${!testname}x == x ] && return 0 + local isonly_base=GCHECK_ONLY_${base} + local isonly=GCHECK_ONLY_$1 + [ ${!isonly_base}x == x -a ${!isonly}x == x ] && return 0 echo -n "checking grant......" + local osts=$(comma_list $(osts_nodes)) local clients=$CLIENTS [ -z "$clients" ] && clients=$(hostname) # sync all the data and make sure no pending data on server do_nodes $clients sync - clients_up # initiate all idling connections + do_nodes $clients $LFS df # initiate all idling connections # get client grant - client_grant=$(do_nodes $clients \ - "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" | - awk '{ total += $1 } END { printf("%0.0f", total) }') + cli_grant=$(grant_from_clients $clients) # get server grant # which is tot_granted less grant_precreate - server_grant=$(do_nodes $(comma_list $(osts_nodes)) \ - "$LCTL get_param "\ - "obdfilter.${FSNAME}-OST*.{tot_granted,tot_pending,grant_precreate}" | - sed 's/=/ /'| awk '/tot_granted/{ total += $2 }; - /tot_pending/{ total -= $2 }; - /grant_precreate/{ total -= $2 }; - END { printf("%0.0f", total) }') + srv_grant=$(grant_from_servers $osts) + count=0 # check whether client grant == server grant - if [[ $client_grant -ne $server_grant ]]; then + while [[ $cli_grant != $srv_grant && count++ -lt 30 ]]; do + echo "wait for client:$cli_grant == server:$srv_grant" + sleep 1 + cli_grant=$(grant_from_clients $clients) + srv_grant=$(grant_from_servers $osts) + done + if [[ $cli_grant -ne $srv_grant ]]; then do_nodes $(comma_list $(osts_nodes)) \ "$LCTL get_param obdfilter.${FSNAME}-OST*.tot*" \ - "obdfilter.${FSNAME}-OST*.grant_*" + "obdfilter.${FSNAME}-OST*.grant_*" do_nodes $clients "$LCTL get_param osc.${FSNAME}-*.cur_*_bytes" - error "failed: client:${client_grant} server: ${server_grant}." + error "failed grant check: client:$cli_grant server:$srv_grant" else - echo "pass: client:${client_grant} server: ${server_grant}" + echo "pass grant check: client:$cli_grant server:$srv_grant" fi } @@ -6566,10 +6822,20 @@ ostuuid_from_index() } ostname_from_index() { - local uuid=$(ostuuid_from_index $1) + local uuid=$(ostuuid_from_index $1 $2) echo ${uuid/_UUID/} } +mdtname_from_index() { + local uuid=$(mdtuuid_from_index $1) + echo ${uuid/_UUID/} +} + +mdssize_from_index () { + local mdt=$(mdtname_from_index $2) + $LFS df $1 | grep $mdt | awk '{ print $2 }' +} + index_from_ostuuid() { $LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p" @@ -6590,7 +6856,7 @@ host_id() { # Description: # Returns list of ip addresses for each interface local_addr_list() { - ip addr | awk '/inet\ / {print $2}' | awk -F\/ '{print $1}' + ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}' } is_local_addr() { @@ -6714,11 +6980,6 @@ osts_nodes () { echo -n $(facets_nodes $(get_facets OST)) } -# Get all of the active AGT (HSM agent) nodes. -agts_nodes () { - echo -n $(facets_nodes $(get_facets AGT)) -} - # Get all of the client nodes and active server nodes. nodes_list () { local nodes=$HOSTNAME @@ -6858,12 +7119,6 @@ get_node_count() { echo $nodes | wc -w || true } -mixed_ost_devs () { - local nodes=$(osts_nodes) - local osscount=$(get_node_count "$nodes") - [ ! "$OSTCOUNT" = "$osscount" ] -} - mixed_mdt_devs () { local nodes=$(mdts_nodes) local mdtcount=$(get_node_count "$nodes") @@ -6888,23 +7143,6 @@ get_stripe () { rm -f $file } -setstripe_nfsserver () { - local dir=$1 - local nfsexportdir=$2 - shift - shift - - local -a nfsexport=($(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \ - { print $1 }' /proc/mounts | cut -f 1 -d :)) - - # check that only one nfs mounted - [[ -z $nfsexport ]] && echo "$dir is not nfs mounted" && return 1 - (( ${#nfsexport[@]} == 1 )) || - error "several nfs mounts found for $dir: ${nfsexport[@]} !" - - do_nodev ${nfsexport[0]} lfs setstripe $nfsexportdir "$@" -} - # Check and add a test group. add_group() { local group_id=$1 @@ -6963,35 +7201,46 @@ add_user() { } check_runas_id_ret() { - local myRC=0 - local myRUNAS_UID=$1 - local myRUNAS_GID=$2 - shift 2 - local myRUNAS=$@ - if [ -z "$myRUNAS" ]; then - error_exit "myRUNAS command must be specified for check_runas_id" - fi - if $GSS_KRB5; then - $myRUNAS krb5_login.sh || \ - error "Failed to refresh Kerberos V5 TGT for UID $myRUNAS_ID." - fi - mkdir $DIR/d0_runas_test - chmod 0755 $DIR - chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test - $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$? - rm -rf $DIR/d0_runas_test - return $myRC + local myRC=0 + local myRUNAS_UID=$1 + local myRUNAS_GID=$2 + shift 2 + local myRUNAS=$@ + + if [ -z "$myRUNAS" ]; then + error_exit "check_runas_id_ret requires myRUNAS argument" + fi + + $myRUNAS true || + error "Unable to execute $myRUNAS" + + id $myRUNAS_UID > /dev/null || + error "Invalid RUNAS_ID $myRUNAS_UID. Please set RUNAS_ID to " \ + "some UID which exists on MDS and client or add user " \ + "$myRUNAS_UID:$myRUNAS_GID on these nodes." + + if $GSS_KRB5; then + $myRUNAS krb5_login.sh || + error "Failed to refresh krb5 TGT for UID $myRUNAS_ID." + fi + mkdir $DIR/d0_runas_test + chmod 0755 $DIR + chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test + $myRUNAS -u $myRUNAS_UID -g $myRUNAS_GID touch $DIR/d0_runas_test/f$$ || + myRC=$? + rm -rf $DIR/d0_runas_test + return $myRC } check_runas_id() { - local myRUNAS_UID=$1 - local myRUNAS_GID=$2 - shift 2 - local myRUNAS=$@ - check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \ - error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_UID. - Please set RUNAS_ID to some UID which exists on MDS and client or - add user $myRUNAS_UID:$myRUNAS_GID on these nodes." + local myRUNAS_UID=$1 + local myRUNAS_GID=$2 + shift 2 + local myRUNAS=$@ + + check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \ + error "unable to write to $DIR/d0_runas_test as " \ + "UID $myRUNAS_UID." } # obtain the UID/GID for MPI_USER @@ -7053,16 +7302,16 @@ multiop_bg_pause() { } do_and_time () { - local cmd=$1 - local rc - - SECONDS=0 - eval '$cmd' + local cmd="$1" + local start + local rc - [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 + start=$SECONDS + eval '$cmd' + [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 - echo $SECONDS - return $rc + echo $((SECONDS - start)) + return $rc } inodes_available () { @@ -7131,19 +7380,18 @@ restore_lustre_params() { check_node_health() { local nodes=${1:-$(comma_list $(nodes_list))} - - for node in ${nodes//,/ }; do - check_network "$node" 5 - if [ $? -eq 0 ]; then - do_node $node "rc=0; - val=\\\$($LCTL get_param -n catastrophe 2>&1); - if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then - echo \\\$(hostname -s): \\\$val; - rc=\\\$val; - fi; - exit \\\$rc" || error "$node:LBUG/LASSERT detected" - fi - done + local health=$TMP/node_health.$$ + + do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health | + grep "catastrophe=1" && error "LBUG/LASSERT detected" + # Only check/report network health if get_param isn't reported, since + # *clearly* the network is working if get_param returned something. + if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then + for node in ${nodes//,/}; do + check_network $node 5 + done + fi + rm -f $health } mdsrate_cleanup () { @@ -7154,11 +7402,6 @@ mdsrate_cleanup () { fi } -delayed_recovery_enabled () { - local var=${SINGLEMDS}_svc - do_facet $SINGLEMDS lctl get_param -n mdd.${!var}.stale_export_age > /dev/null 2>&1 -} - ######################## convert_facet2label() { @@ -7183,20 +7426,6 @@ get_clientosc_proc_path() { echo "${1}-osc-[-0-9a-f]*" } -# If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names -# used by MDT would not be changed. -# mdt lov: fsname-mdtlov -# mdt osc: fsname-OSTXXXX-osc -mds_on_old_device() { - local mds=${1:-"$SINGLEMDS"} - - if [ $(lustre_version_code $mds) -gt $(version_code 1.9.0) ]; then - do_facet $mds "lctl list_param osc.$FSNAME-OST*-osc \ - > /dev/null 2>&1" && return 0 - fi - return 1 -} - get_mdtosc_proc_path() { local mds_facet=$1 local ost_label=${2:-"*OST*"} @@ -7227,48 +7456,49 @@ get_osc_import_name() { } _wait_import_state () { - local expected=$1 - local CONN_PROC=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local CONN_STATE - local i=0 + local expected="$1" + local CONN_PROC="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local CONN_STATE + local i=0 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do - if [ "${expected}" == "DISCONN" ]; then - # for disconn we can check after proc entry is removed - [ "x${CONN_STATE}" == "x" ] && return 0 - # with AT enabled, we can have connect request timeout near of - # reconnect timeout and test can't see real disconnect - [ "${CONN_STATE}" == "CONNECTING" ] && return 0 - fi - if [ $i -ge $maxtime ]; then - [ $error_on_failure -ne 0 ] && \ - error "can't put import for $CONN_PROC into ${expected}" \ - "state after $i sec, have ${CONN_STATE}" - return 1 - fi - sleep 1 - # Add uniq for multi-mount case - CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - i=$(($i + 1)) - done + while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do + if [[ "${expected}" == "DISCONN" ]]; then + # for disconn we can check after proc entry is removed + [[ -z "${CONN_STATE}" ]] && return 0 + # with AT, we can have connect request timeout near + # reconnect timeout and test can't see real disconnect + [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0 + fi + if (( $i >= $maxtime )); then + (( $err_on_fail != 0 )) && + error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}" + return 1 + fi + sleep 1 + # Add uniq for multi-mount case + CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | + cut -f2 | uniq) + i=$((i + 1)) + done - log "$CONN_PROC in ${CONN_STATE} state after $i sec" - return 0 + log "$CONN_PROC in ${CONN_STATE} state after $i sec" + return 0 } wait_import_state() { - local state=$1 - local params=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local param - - for param in ${params//,/ }; do - _wait_import_state $state $param $maxtime $error_on_failure || return - done + local expected="$1" + local params="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local param + + for param in ${params//,/ }; do + _wait_import_state "$expected" "$param" $maxtime $err_on_fail || + return + done } wait_import_state_mount() { @@ -7276,7 +7506,7 @@ wait_import_state_mount() { return 0 fi - wait_import_state $* + wait_import_state "$@" } # One client request could be timed out because server was not ready @@ -7356,8 +7586,9 @@ _wait_osc_import_state() { params=$param fi + local plist=$(comma_list $params) if ! do_rpc_nodes "$(facet_active_host $facet)" \ - wait_import_state $expected "$params" $maxtime; then + wait_import_state $expected $plist $maxtime; then error "$facet: import is not in $expected state after $maxtime" return 1 fi @@ -7412,8 +7643,9 @@ _wait_mgc_import_state() { params=$($LCTL list_param $param 2>/dev/null || true) done fi + local plist=$(comma_list $params) if ! do_rpc_nodes "$(facet_active_host $facet)" \ - wait_import_state $expected "$params" $maxtime \ + wait_import_state $expected $plist $maxtime \ $error_on_failure; then if [ $error_on_failure -ne 0 ]; then error "import is not in ${expected} state" @@ -7473,11 +7705,10 @@ do_rpc_nodes () { } wait_clients_import_state () { - local list=$1 - local facet=$2 - local expected=$3 - - local facets=$facet + local list="$1" + local facet="$2" + local expected="$3" + local facets="$facet" if [ "$FAILURE_MODE" = HARD ]; then facets=$(facets_on_host $(facet_active_host $facet)) @@ -7488,11 +7719,11 @@ wait_clients_import_state () { local proc_path case $facet in ost* ) proc_path="osc.$(get_clientosc_proc_path \ - $label).ost_server_uuid" ;; + $label).ost_server_uuid" ;; mds* ) proc_path="mdc.$(get_clientmdc_proc_path \ - $label).mds_server_uuid" ;; + $label).mds_server_uuid" ;; mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \ - $label).mgs_server_uuid" ;; + $label).mgs_server_uuid" ;; *) error "unknown facet!" ;; esac @@ -7590,6 +7821,7 @@ check_pool_not_exist() { create_pool() { local fsname=${1%%.*} local poolname=${1##$fsname.} + local keep_pools=${2:-false} stack_trap "destroy_test_pools $fsname" EXIT do_facet mgs lctl pool_new $1 @@ -7608,7 +7840,7 @@ create_pool() { wait_update $HOSTNAME "lctl get_param -n lov.$fsname-*.pools.$poolname \ 2>/dev/null || echo foo" "" || error "pool_new failed $1" - add_pool_to_list $1 + $keep_pools || add_pool_to_list $1 return $RC } @@ -7626,10 +7858,18 @@ remove_pool_from_list () { local poolname=${1##$fsname.} local listvar=${fsname}_CREATED_POOLS - local temp=${listvar}=$(exclude_items_from_list ${!listvar} $poolname) + local temp=${listvar}=$(exclude_items_from_list "${!listvar}" $poolname) eval export $temp } +# cleanup all pools exist on $FSNAME +destroy_all_pools () { + local i + for i in $(list_pool $FSNAME); do + destroy_pool $i + done +} + destroy_pool_int() { local ost local OSTS=$(list_pool $1) @@ -7650,8 +7890,7 @@ destroy_pool() { local RC - check_pool_not_exist $fsname.$poolname - [[ $? -eq 0 ]] && return 0 + check_pool_not_exist $fsname.$poolname && return 0 || true destroy_pool_int $fsname.$poolname RC=$? @@ -7692,39 +7931,65 @@ destroy_test_pools () { } gather_logs () { - local list=$1 + local list=$1 - local ts=$(date +%s) - local docp=true + local ts=$(date +%s) + local docp=true - if [[ ! -f "$YAML_LOG" ]]; then - # init_logging is not performed before gather_logs, - # so the $LOGDIR needs to be checked here - check_shared_dir $LOGDIR && touch $LOGDIR/shared - fi + if [[ ! -f "$YAML_LOG" ]]; then + # init_logging is not performed before gather_logs, + # so the $LOGDIR needs to be checked here + check_shared_dir $LOGDIR && touch $LOGDIR/shared + fi - [ -f $LOGDIR/shared ] && docp=false + [ -f $LOGDIR/shared ] && docp=false - # dump lustre logs, dmesg + # dump lustre logs, dmesg, and journal if GSS_SK=true - prefix="$TESTLOG_PREFIX.$TESTNAME" - suffix="$ts.log" - echo "Dumping lctl log to ${prefix}.*.${suffix}" + prefix="$TESTLOG_PREFIX.$TESTNAME" + suffix="$ts.log" + echo "Dumping lctl log to ${prefix}.*.${suffix}" - if [ -n "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then - echo "Dumping logs only on local client." - $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix} - dmesg > ${prefix}.dmesg.$(hostname -s).${suffix} - return - fi + if [ -n "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then + echo "Dumping logs only on local client." + $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix} + dmesg > ${prefix}.dmesg.$(hostname -s).${suffix} + [ "$SHARED_KEY" = true ] && find $SK_PATH -name '*.key' -exec \ + lgss_sk -r {} \; &> \ + ${prefix}.ssk_keys.$(hostname -s).${suffix} + [ "$SHARED_KEY" = true ] && lctl get_param 'nodemap.*.*' > \ + ${prefix}.nodemaps.$(hostname -s).${suffix} + [ "$GSS_SK" = true ] && keyctl show > \ + ${prefix}.keyring.$(hostname -s).${suffix} + [ "$GSS_SK" = true ] && journalctl -a > \ + ${prefix}.journal.$(hostname -s).${suffix} + return + fi + + do_nodesv $list \ + "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix}; + dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}" + if [ "$SHARED_KEY" = true ]; then + do_nodesv $list "find $SK_PATH -name '*.key' -exec \ + lgss_sk -r {} \; &> \ + ${prefix}.ssk_keys.\\\$(hostname -s).${suffix}" + do_facet mds1 "lctl get_param 'nodemap.*.*' > \ + ${prefix}.nodemaps.\\\$(hostname -s).${suffix}" + fi + if [ "$GSS_SK" = true ]; then + do_nodesv $list "keyctl show > \ + ${prefix}.keyring.\\\$(hostname -s).${suffix}" + do_nodesv $list "journalctl -a > \ + ${prefix}.journal.\\\$(hostname -s).${suffix}" + fi - do_nodesv $list \ - "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix}; - dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}" + if [ ! -f $LOGDIR/shared ]; then + local remote_nodes=$(exclude_items_from_list $list $HOSTNAME) - if [ ! -f $LOGDIR/shared ]; then - do_nodes $list rsync -az "${prefix}.*.${suffix}" $HOSTNAME:$LOGDIR - fi + for node in ${remote_nodes//,/ }; do + rsync -az -e ssh $node:${prefix}.'*'.${suffix} $LOGDIR & + done + fi } do_ls () { @@ -8237,13 +8502,10 @@ init_logging() { umask $save_umask - # If modules are not yet loaded then older "lctl lustre_build_version" - # will fail. Use lctl build version instead. - log "Client: $($LCTL lustre_build_version)" - log "MDS: $(do_facet $SINGLEMDS $LCTL lustre_build_version 2>/dev/null|| - do_facet $SINGLEMDS $LCTL --version)" - log "OSS: $(do_facet ost1 $LCTL lustre_build_version 2> /dev/null || - do_facet ost1 $LCTL --version)" + # log actual client and server versions if needed for debugging + log "Client: $(lustre_build_version client)" + log "MDS: $(lustre_build_version mds1)" + log "OSS: $(lustre_build_version ost1)" } log_test() { @@ -8296,94 +8558,6 @@ run_llverfs() llverfs $partial_arg $llverfs_opts $dir } -#Remove objects from OST -remove_ost_objects() { - local facet=$1 - local ostdev=$2 - local group=$3 - shift 3 - local objids="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$OST_MOUNT_OPTS - local i - local rc - - echo "removing objects from $ostdev on $facet: $objids" - if ! test -b $ostdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $ostdev $mntpt || - return $? - rc=0 - for i in $objids; do - rm $mntpt/O/$group/d$((i % 32))/$i || { rc=$?; break; } - done - umount -f $mntpt || return $? - return $rc -} - -#Remove files from MDT -remove_mdt_files() { - local facet=$1 - local mdtdev=$2 - shift 2 - local files="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$MDS_MOUNT_OPTS - - echo "removing files from $mdtdev on $facet: $files" - if [ $(facet_fstype $facet) == ldiskfs ] && - ! do_facet $facet test -b $mdtdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt || - return $? - rc=0 - for f in $files; do - rm $mntpt/ROOT/$f || { rc=$?; break; } - done - umount -f $mntpt || return $? - return $rc -} - -duplicate_mdt_files() { - local facet=$1 - local mdtdev=$2 - shift 2 - local files="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$MDS_MOUNT_OPTS - - echo "duplicating files on $mdtdev on $facet: $files" - mkdir -p $mntpt || return $? - if [ $(facet_fstype $facet) == ldiskfs ] && - ! do_facet $facet test -b $mdtdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt || - return $? - - do_umount() { - trap 0 - popd > /dev/null - rm $tmp - umount -f $mntpt - } - trap do_umount EXIT - - tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX) - pushd $mntpt/ROOT > /dev/null || return $? - rc=0 - for f in $files; do - touch $f.bad || return $? - getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp - rc=${PIPESTATUS[0]} - [ $rc -eq 0 ] || return $rc - setfattr --restore $tmp || return $? - done - do_umount -} - run_sgpdd () { local devs=${1//,/ } shift @@ -8471,22 +8645,11 @@ get_block_count() { echo -n ${count:-0} } -# Get the block size of the filesystem. -get_block_size() { - local facet=$1 - local device=$2 - local size - - [ -z "$CLIENTONLY" ] && size=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" | - awk '/^Block size:/ {print $3}') - echo -n ${size:-0} -} - # Check whether the "ea_inode" feature is enabled or not, to allow # ldiskfs xattrs over one block in size. Allow both the historical # Lustre feature name (large_xattr) and the upstream name (ea_inode). large_xattr_enabled() { - [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0 + [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1 local mds_dev=$(mdsdevname ${SINGLEMDS//mds/}) @@ -8497,20 +8660,7 @@ large_xattr_enabled() { # Get the maximum xattr size supported by the filesystem. max_xattr_size() { - local size - - if large_xattr_enabled; then - size=$($LCTL get_param -n llite.*.max_easize) - else - local mds_dev=$(mdsdevname ${SINGLEMDS//mds/}) - local block_size=$(get_block_size $SINGLEMDS $mds_dev) - - # maximum xattr size = size of block - size of header - - # size of 1 entry - 4 null bytes - size=$((block_size - 32 - 32 - 4)) - fi - - echo $size + $LCTL get_param -n llite.*.max_easize } # Dump the value of the named xattr from a file. @@ -8550,7 +8700,7 @@ mds_backup_restore() { local rcmd="do_facet $facet" local metaea=${TMP}/backup_restore.ea local metadata=${TMP}/backup_restore.tgz - local opts=${MDS_MOUNT_OPTS} + local opts=${MDS_MOUNT_FS_OPTS} local svc=${facet}_svc if ! ${rcmd} test -b ${devname}; then @@ -8607,7 +8757,7 @@ mds_remove_ois() { local devname=$(mdsdevname $(facet_number $facet)) local mntpt=$(facet_mntpt brpt) local rcmd="do_facet $facet" - local opts=${MDS_MOUNT_OPTS} + local opts=${MDS_MOUNT_FS_OPTS} if ! ${rcmd} test -b ${devname}; then opts=$(csa_add "$opts" -o loop) @@ -8649,16 +8799,19 @@ generate_logname() { test_mkdir() { local path local p_option + local hash_type + local hash_name=("all_char" "fnv_1a_64" "crush") local dirstripe_count=${DIRSTRIPE_COUNT:-"2"} local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))} local OPTIND=1 - while getopts "c:i:p" opt; do + while getopts "c:H:i:p" opt; do case $opt in c) dirstripe_count=$OPTARG;; + H) hash_type=$OPTARG;; i) dirstripe_index=$OPTARG;; p) p_option="-p";; - \?) error "only support -i -c -p";; + \?) error "only support -c -H -i -p";; esac done @@ -8666,9 +8819,8 @@ test_mkdir() { [ $# -eq 1 ] || error "Only creating single directory is supported" path="$*" + local parent=$(dirname $path) if [ "$p_option" == "-p" ]; then - local parent=$(dirname $path) - [ -d $path ] && return 0 if [ ! -d ${parent} ]; then mkdir -p ${parent} || @@ -8676,7 +8828,7 @@ test_mkdir() { fi fi - if [ $MDSCOUNT -le 1 ]; then + if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then mkdir $path || error "mkdir '$path' failed" else local mdt_index @@ -8687,6 +8839,10 @@ test_mkdir() { mdt_index=$dirstripe_index fi + # randomly choose hash type + [ -z "$hash_type" ] && + hash_type=${hash_name[$((RANDOM % ${#hash_name[@]}))]} + if (($MDS1_VERSION >= $(version_code 2.8.0))); then if [ $dirstripe_count -eq -1 ]; then dirstripe_count=$((RANDOM % MDSCOUNT + 1)) @@ -8695,9 +8851,9 @@ test_mkdir() { dirstripe_count=1 fi - echo "striped dir -i$mdt_index -c$dirstripe_count $path" - $LFS mkdir -i$mdt_index -c$dirstripe_count $path || - error "mkdir -i $mdt_index -c$dirstripe_count $path failed" + echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path" + $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path || + error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed" fi } @@ -8724,7 +8880,7 @@ check_mount_and_prep() is_mounted $MOUNT || setupall rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!" - mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." + mkdir_on_mdt0 $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." for idx in $(seq $MDSCOUNT); do local name="MDT$(printf '%04x' $((idx - 1)))" rm -rf $MOUNT/.lustre/lost+found/$name/* @@ -8751,7 +8907,7 @@ check_file_in_pool() local file=$1 local pool=$2 local tlist="$3" - local res=$($GETSTRIPE $file | grep 0x | cut -f2) + local res=$($LFS getstripe $file | grep 0x | cut -f2) for i in $res do for t in $tlist ; do @@ -8790,8 +8946,18 @@ pool_add_targets() { fi local t=$(for i in $list; do printf "$FSNAME-OST%04x_UUID " $i; done) + local tg=$(for i in $list; + do printf -- "-e $FSNAME-OST%04x_UUID " $i; done) + local firstx=$(printf "%04x" $first) + local lastx=$(printf "%04x" $last) + do_facet mgs $LCTL pool_add \ - $FSNAME.$pool $FSNAME-OST[$first-$last/$step] + $FSNAME.$pool $FSNAME-OST[$firstx-$lastx/$step] + # ignore EEXIST(17) + if (( $? != 0 && $? != 17 )); then + error_noexit "pool_add $FSNAME-OST[$firstx-$lastx/$step] failed" + return 3 + fi # wait for OSTs to be added to the pool for mds_id in $(seq $MDSCOUNT); do @@ -8799,23 +8965,16 @@ pool_add_targets() { local lodname=$FSNAME-MDT$(printf "%04x" $mdt_id)-mdtlov wait_update_facet mds$mds_id \ "lctl get_param -n lod.$lodname.pools.$pool | - sort -u | tr '\n' ' ' " "$t" || { + grep $tg | sort -u | tr '\n' ' '" "$t" || { error_noexit "mds$mds_id: Add to pool failed" - return 3 + return 2 } done - wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool \ - | sort -u | tr '\n' ' ' " "$t" || { + wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool | + grep $tg | sort -u | tr '\n' ' ' " "$t" || { error_noexit "Add to pool failed" return 1 } - local lfscount=$($LFS pool_list $FSNAME.$pool | grep -c "\-OST") - local addcount=$(((last - first) / step + 1)) - [ $lfscount -eq $addcount ] || { - error_noexit "lfs pool_list bad ost count" \ - "$lfscount != $addcount" - return 2 - } } pool_set_dir() { @@ -8823,7 +8982,7 @@ pool_set_dir() { local tdir=$2 echo "Setting pool on directory $tdir" - $SETSTRIPE -c 2 -p $pool $tdir && return 0 + $LFS setstripe -c 2 -p $pool $tdir && return 0 error_noexit "Cannot set pool $pool to $tdir" return 1 @@ -8834,7 +8993,7 @@ pool_check_dir() { local tdir=$2 echo "Checking pool on directory $tdir" - local res=$($GETSTRIPE --pool $tdir | sed "s/\s*$//") + local res=$($LFS getstripe --pool $tdir | sed "s/\s*$//") [ "$res" = "$pool" ] && return 0 error_noexit "Pool on '$tdir' is '$res', not '$pool'" @@ -8889,7 +9048,7 @@ pool_create_files() { for i in $(seq -w 1 $count) do local file=$tdir/spoo-$i - $SETSTRIPE -p $pool $file + $LFS setstripe -p $pool $file check_file_in_pool $file $pool "$tlist" || \ failed=$((failed + 1)) done @@ -8923,11 +9082,11 @@ pool_file_rel_path() { mkdir -p $tdir || { error_noexit "unable to create $tdir"; return 1 ; } local file="/..$tdir/$tfile-1" - $SETSTRIPE -p $pool $file || + $LFS setstripe -p $pool $file || { error_noexit "unable to create $file" ; return 2 ; } cd $tdir - $SETSTRIPE -p $pool $tfile-2 || { + $LFS setstripe -p $pool $tfile-2 || { error_noexit "unable to create $tfile-2 in $tdir" return 3 } @@ -8935,10 +9094,21 @@ pool_file_rel_path() { pool_remove_first_target() { echo "Removing first target from a pool" + pool_remove_target $1 -1 +} + +pool_remove_target() { local pool=$1 + local index=$2 local pname="lov.$FSNAME-*.pools.$pool" - local t=$($LCTL get_param -n $pname | head -1) + if [ $index -eq -1 ]; then + local t=$($LCTL get_param -n $pname | head -1) + else + local t=$(printf "$FSNAME-OST%04x_UUID" $index) + fi + + echo "Removing $t from $pool" do_facet mgs $LCTL pool_remove $FSNAME.$pool $t for mds_id in $(seq $MDSCOUNT); do local mdt_id=$((mds_id-1)) @@ -8986,7 +9156,7 @@ pool_remove_all_targets() { return 2 } # setstripe on an empty pool should fail - $SETSTRIPE -p $pool $file 2>/dev/null && { + $LFS setstripe -p $pool $file 2>/dev/null && { error_noexit "expected failure when creating file" \ "with empty pool" return 3 @@ -9009,7 +9179,7 @@ pool_remove() { return 1 } # setstripe on an empty pool should fail - $SETSTRIPE -p $pool $file 2>/dev/null && { + $LFS setstripe -p $pool $file 2>/dev/null && { error_noexit "expected failure when creating file" \ "with missing pool" return 2 @@ -9058,7 +9228,7 @@ check_obdidx() { [[ -z "$file" || -z "$expected" ]] && error "check_obdidx: invalid argument!" - obdidx=$(comma_list $($GETSTRIPE $file | grep -A $OSTCOUNT obdidx | + obdidx=$(comma_list $($LFS getstripe $file | grep -A $OSTCOUNT obdidx | grep -v obdidx | awk '{print $1}' | xargs)) [[ $obdidx = $expected ]] || @@ -9076,8 +9246,8 @@ check_start_ost_idx() { [[ -z "$file" || -z "$expected" ]] && error "check_start_ost_idx: invalid argument!" - start_ost_idx=$($GETSTRIPE $file | grep -A 1 obdidx | grep -v obdidx | - awk '{print $1}') + start_ost_idx=$($LFS getstripe $file | grep -A 1 obdidx | + grep -v obdidx | awk '{print $1}') [[ $start_ost_idx = $expected ]] || error "OST index of the first stripe on $file is" \ @@ -9327,14 +9497,17 @@ check_clients_evicted() { local rc=0 for osc in $oscs; do - ((rc++)) echo "Check state for $osc" local evicted=$(do_facet client $LCTL get_param osc.$osc.state | - tail -n 3 | awk -F"[ [,]" \ - '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + tail -n 5 | awk -F"[ ,]" \ + '/EVICTED/ { if (mx<$4) { mx=$4; } } END { print mx }') if (($? == 0)) && (($evicted > $before)); then echo "$osc is evicted at $evicted" - ((rc--)) + else + ((rc++)) + echo "$osc was not evicted after $before:" + do_facet client $LCTL get_param osc.$osc.state | + tail -n 8 fi done @@ -9391,8 +9564,8 @@ changelog_register() { error "$mdt: changelog_mask=+hsm failed: $?" local cl_user - cl_user=$(do_facet $facet \ - $LCTL --device $mdt changelog_register -n) || + cl_user=$(do_facet $facet $LCTL --device $mdt \ + changelog_register -n $@) || error "$mdt: register changelog user failed: $?" stack_trap "__changelog_deregister $facet $cl_user" EXIT @@ -9479,6 +9652,7 @@ changelog_clear() { # so reorder to get same order than in changelog_register() local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort | tr "\n" " ") + local cl_user for facet in $cl_facets; do for cl_user in ${CL_USERS[$facet]}; do @@ -9490,12 +9664,24 @@ changelog_clear() { } changelog_dump() { + local rc + for M in $(seq $MDSCOUNT); do local facet=mds$M local mdt="$(facet_svc $facet)" - - $LFS changelog $mdt | sed -e 's/^/'$mdt'./' + local output + local ret + + output=$($LFS changelog $mdt) + ret=$? + if [ $ret -ne 0 ]; then + rc=${rc:-$ret} + elif [ -n "$output" ]; then + echo "$output" | sed -e 's/^/'$mdt'./' + fi done + + return ${rc:-0} } changelog_extract_field() { @@ -9730,45 +9916,58 @@ verify_yaml_layout() { is_project_quota_supported() { $ENABLE_PROJECT_QUOTAS || return 1 - [ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" ] && - [ $(lustre_version_code $SINGLEMDS) -gt \ - $(version_code 2.9.55) ] && - lfs --help | grep project >&/dev/null && - egrep -q "7." /etc/redhat-release && return 0 - if [ "$(facet_fstype $SINGLEMDS)" == "zfs" ]; then - [ $(lustre_version_code $SINGLEMDS) -le \ - $(version_code 2.10.53) ] && return 1 + [[ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" && + $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.9.55) ]] && + do_facet mds1 lfs --help |& grep -q project && return 0 - do_facet mds1 $ZPOOL upgrade -v | - grep project_quota && return 0 - fi + [[ "$(facet_fstype $SINGLEMDS)" == "zfs" && + $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.10.53) ]] && + do_facet mds1 $ZPOOL get all | grep -q project_quota && return 0 return 1 } +# ZFS project quota enable/disable: +# This feature will become active as soon as it is enabled and will never +# return to being disabled. Each filesystem will be upgraded automatically +# when remounted or when [a] new file is created under that filesystem. The +# upgrade can also be triggered on filesystems via `zfs set version=current +# `. The upgrade process runs in the background and may take a +# while to complete for the filesystems containing a large number of files. enable_project_quota() { is_project_quota_supported || return 0 - [ "$(facet_fstype $SINGLEMDS)" != "ldiskfs" ] && return 0 + local zkeeper=${KEEP_ZPOOL} + stack_trap "KEEP_ZPOOL=$zkeeper" EXIT + KEEP_ZPOOL="true" stopall || error "failed to stopall (1)" - for num in $(seq $MDSCOUNT); do - do_facet mds$num $TUNE2FS -O project $(mdsdevname $num) || - error "tune2fs $(mdsdevname $num) failed" - done + local zfeat_en="feature@project_quota=enabled" + for facet in $(seq -f mds%g $MDSCOUNT) $(seq -f ost%g $OSTCOUNT); do + local facet_fstype=${facet:0:3}1_FSTYPE + local devname - for num in $(seq $OSTCOUNT); do - do_facet ost$num $TUNE2FS -O project $(ostdevname $num) || - error "tune2fs $(ostdevname $num) failed" + if [ "${!facet_fstype}" = "zfs" ]; then + devname=$(zpool_name ${facet}) + do_facet ${facet} $ZPOOL set "$zfeat_en" $devname || + error "$ZPOOL set $zfeat_en $devname" + else + [ ${facet:0:3} == "mds" ] && + devname=$(mdsdevname ${facet:3}) || + devname=$(ostdevname ${facet:3}) + do_facet ${facet} $TUNE2FS -O project $devname || + error "tune2fs $devname failed" + fi done + KEEP_ZPOOL="${zkeeper}" mount setupall } disable_project_quota() { is_project_quota_supported || return 0 - [ "$(facet_fstype $SINGLEMDS)" != "ldiskfs" ] && return 0 + [ "$mds1_FSTYPE" != "ldiskfs" ] && return 0 stopall || error "failed to stopall (1)" for num in $(seq $MDSCOUNT); do @@ -9831,11 +10030,16 @@ init_agt_vars() { export SINGLEAGT=${SINGLEAGT:-agt1} export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} + export HSMTOOL_PID_FILE=${HSMTOOL_PID_FILE:-"/var/run/lhsmtool_posix.pid"} export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") + export HSMTOOL_ARCHIVE_FORMAT=${HSMTOOL_ARCHIVE_FORMAT:-v2} + + if ! [[ $HSMTOOL =~ hsmtool ]]; then + echo "HSMTOOL = '$HSMTOOL' does not contain 'hsmtool', GLWT" >&2 + fi HSM_ARCHIVE_NUMBER=2 @@ -9869,44 +10073,26 @@ get_mdt_devices() { done } -search_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" +pkill_copytools() { + local hosts="$1" + local signal="$2" + + do_nodes "$hosts" "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool" } -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} +copytool_continue() { + local agents=${1:-$(facet_active_host $SINGLEAGT)} - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + pkill_copytools "$agents" CONT || return 0 + echo "Copytool is continued on $agents" } -wait_copytools() { +kill_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - local wait_timeout=200 - local wait_start=$SECONDS - local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second - - while ((SECONDS < wait_end)); do - if ! search_copytools $hosts; then - echo "copytools stopped in $((SECONDS - wait_start))s" - return 0 - fi - - echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") - done - # try to dump Copytool's stack - do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \ - "echo t >/proc/sysrq-trigger" - - echo "copytools failed to stop in ${wait_timeout}s" - - return 1 + echo "Killing existing copytools on $hosts" + pkill_copytools "$hosts" TERM || return 0 + copytool_continue "$hosts" } copytool_monitor_cleanup() { @@ -9941,28 +10127,29 @@ copytool_logfile() __lhsmtool_rebind() { - do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint" + do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --rebind "$@" "$mountpoint" } __lhsmtool_import() { mkdir -p "$(dirname "$2")" || error "cannot create directory '$(dirname "$2")'" - do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint" + do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --import "$@" "$mountpoint" } __lhsmtool_setup() { - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\"" + local host="$(facet_host "$facet")" + local cmd="$HSMTOOL ${hsmtool_options[@]} --daemon --pid-file=$HSMTOOL_PID_FILE" [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth" [ -n "$archive_id" ] && cmd+=" --archive $archive_id" - [ ${#misc_options[@]} -gt 0 ] && - cmd+=" $(IFS=" " echo "$@")" - cmd+=" \"$mountpoint\"" +# [ ${#misc_options[@]} -gt 0 ] && +# cmd+=" $(IFS=" " echo "$@")" + cmd+=" $@ \"$mountpoint\"" - echo "Starting copytool $facet on $(facet_host $facet)" - stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT - do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" + echo "Starting copytool '$facet' on '$host' with cmdline '$cmd'" + stack_trap "pkill_copytools $host TERM || true" EXIT + do_node "$host" "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" } hsm_root() { @@ -9988,18 +10175,33 @@ copytool() local action=$1 shift + # Use default values + local facet=$SINGLEAGT + local mountpoint="${MOUNT2:-$MOUNT}" + local hsm_root="${hsm_root:-$(hsm_root "$facet")}" + # Parse arguments local fail_on_error=true - local -a misc_options + local -a hsmtool_options=("--hsm-root=$hsm_root") + local -a action_options=() + + if [[ -n "$HSMTOOL_ARCHIVE_FORMAT" ]]; then + hsmtool_options+=("--archive-format=$HSMTOOL_ARCHIVE_FORMAT") + fi + + if [[ -n "$HSMTOOL_VERBOSE" ]]; then + hsmtool_options+=("$HSMTOOL_VERBOSE") + fi + while [ $# -gt 0 ]; do case "$1" in -f|--facet) shift - local facet="$1" + facet="$1" ;; -m|--mountpoint) shift - local mountpoint="$1" + mountpoint="$1" ;; -a|--archive-id) shift @@ -10007,7 +10209,7 @@ copytool() ;; -h|--hsm-root) shift - local hsm_root="$1" + hsm_root="$1" ;; -b|--bwlimit) shift @@ -10018,17 +10220,12 @@ copytool() ;; *) # Uncommon(/copytool dependent) option - misc_options+=("$1") + action_options+=("$1") ;; esac shift done - # Use default values if needed - local facet=${facet:-$SINGLEAGT} - local mountpoint="${mountpoint:-${MOUNT2:-$MOUNT}}" - local hsm_root="${hsm_root:-$(hsm_root "$facet")}" - stack_trap "do_facet $facet rm -rf '$hsm_root'" EXIT do_facet $facet mkdir -p "$hsm_root" || error "mkdir '$hsm_root' failed" @@ -10039,7 +10236,7 @@ copytool() ;; esac - __${copytool}_${action} "${misc_options[@]}" + __${copytool}_${action} "${action_options[@]}" if [ $? -ne 0 ]; then local error_msg @@ -10049,8 +10246,8 @@ copytool() error_msg="Failed to start copytool $facet on '$host'" ;; import) - local src="${misc_options[0]}" - local dest="${misc_options[1]}" + local src="${action_options[0]}" + local dest="${action_options[1]}" error_msg="Failed to import '$src' to '$dest'" ;; rebind) @@ -10123,20 +10320,15 @@ mdts_set_param() { return $rc } -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - mdts_check_param() { local key="$1" local target="$2" local timeout="$3" local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ + wait_update_facet --verbose mds${mdtno} \ "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ $timeout || error "$key state is not '$target' on mds${mdtno}" @@ -10187,7 +10379,7 @@ wait_request_state() { local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - wait_result $mds "$cmd" "$state" 200 || + wait_update_facet --verbose $mds "$cmd" "$state" 200 || error "request on $fid is not $state on $mds" } @@ -10273,8 +10465,8 @@ verify_comp_at_zero() { error "No component starting at zero(!)" } -#TODO: This version is a placeholder, to be replaced before final commit -SEL_VER="2.12.52" +# version after which Self-Extending Layouts are available +SEL_VER="2.12.55" sel_layout_sanity() { local file=$1 @@ -10285,3 +10477,260 @@ sel_layout_sanity() { check_component_count $file $comp_cnt } +statx_supported() { + $STATX --quiet --version + return $? +} + +# +# wrappers for createmany and unlinkmany +# to set debug=0 if number of creates is high enough +# this is to speedup testing +# +function createmany() { + local count=${!#} + + (( count > 100 )) && { + local saved_debug=$($LCTL get_param -n debug) + local list=$(comma_list $(all_nodes)) + + do_nodes $list $LCTL set_param -n debug=0 + } + $LUSTRE/tests/createmany $* + local rc=$? + (( count > 100 )) && + do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\"" + return $rc +} + +function unlinkmany() { + local count=${!#} + + (( count > 100 )) && { + local saved_debug=$($LCTL get_param -n debug) + local list=$(comma_list $(all_nodes)) + + do_nodes $list $LCTL set_param -n debug=0 + } + $LUSTRE/tests/unlinkmany $* + local rc=$? + (( count > 100 )) && + do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\"" + return $rc +} + +# Check if fallocate supported on OSTs, enable if unset, default mode=0 +# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents) +function check_set_fallocate() +{ + local new_mode="$1" + local osts=$(comma_list $(osts_nodes)) + local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks" + local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null| + head -n 1) + + [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; } + [[ -z "$new_mode" && "$old_mode" != "-1" ]] && + { echo "keep default fallocate mode: $old_mode"; return 0; } + [[ "$new_mode" && "$old_mode" == "$new_mode" ]] && + { echo "keep current fallocate mode: $old_mode"; return 0; } + + stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode" + do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} || + error "set $fa_mode=$new_mode" +} + +# Check if fallocate supported on OSTs, enable if unset, skip if unavailable +function check_set_fallocate_or_skip() +{ + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + check_set_fallocate || skip "need at least 2.13.57 for fallocate" +} + +function disable_opencache() +{ + local state=$($LCTL get_param -n "llite.*.opencache_threshold_count" | head -1) + + test -z "${saved_OPENCACHE_value}" && + export saved_OPENCACHE_value="$state" + + [[ "$state" = "off" ]] && return + + $LCTL set_param -n "llite.*.opencache_threshold_count"=off +} + +function set_opencache() +{ + local newvalue="$1" + local state=$($LCTL get_param -n "llite.*.opencache_threshold_count") + + [[ -n "$newvalue" ]] || return + + [[ -n "${saved_OPENCACHE_value}" ]] || + export saved_OPENCACHE_value="$state" + + $LCTL set_param -n "llite.*.opencache_threshold_count"=$newvalue +} + + + +function restore_opencache() +{ + [[ -z "${saved_OPENCACHE_value}" ]] || + $LCTL set_param -n "llite.*.opencache_threshold_count"=${saved_OPENCACHE_value} +} + +# LU-13417: XXX lots of tests assume the directory to be created under MDT0, +# created on MDT0, use this function to create directory on specific MDT +# explicitly, and set default LMV to create subdirs on the same MDT too. +mkdir_on_mdt() { + local mdt + local OPTIND=1 + + while getopts "i:" opt $*; do + case $opt in + i) mdt=$OPTARG;; + esac + done + + shift $((OPTIND - 1)) + + $LFS mkdir -i $mdt -c 1 $* + # setting default LMV in non-DNE system will cause sanity-quota 41 fail + ((MDSCOUNT < 2)) || $LFS setdirstripe -D -i $mdt -c 1 $* +} + +mkdir_on_mdt0() { + mkdir_on_mdt -i0 $* +} + +# Wait for nodemap synchronization +wait_nm_sync() { + local nodemap_name=$1 + local key=$2 + local value=$3 + local opt=$4 + local proc_param + local is_active=$(do_facet mgs $LCTL get_param -n nodemap.active) + local max_retries=20 + local is_sync + local out1="" + local out2 + local mgs_ip=$(host_nids_address $mgs_HOST $NETTYPE | cut -d' ' -f1) + local i + + if [ "$nodemap_name" == "active" ]; then + proc_param="active" + elif [ -z "$key" ]; then + proc_param=${nodemap_name} + else + proc_param="${nodemap_name}.${key}" + fi + if [ "$opt" == "inactive" ]; then + # check nm sync even if nodemap is not activated + is_active=1 + opt="" + fi + (( is_active == 0 )) && [ "$proc_param" != "active" ] && return + + if [ -z "$value" ]; then + out1=$(do_facet mgs $LCTL get_param $opt \ + nodemap.${proc_param} 2>/dev/null) + echo "On MGS ${mgs_ip}, ${proc_param} = $out1" + else + out1=$value; + fi + + # if servers run on the same node, it is impossible to tell if they get + # synced with the mgs, so just wait an arbitrary 10 seconds + if [ $(facet_active_host mgs) == $(facet_active_host mds) ] && + [ $(facet_active_host mgs) == $(facet_active_host ost1) ]; then + echo "waiting 10 secs for sync" + sleep 10 + return + fi + + # wait up to 10 seconds for other servers to sync with mgs + for i in $(seq 1 10); do + for node in $(all_server_nodes); do + local node_ip=$(host_nids_address $node $NETTYPE | + cut -d' ' -f1) + + is_sync=true + if [ -z "$value" ]; then + [ $node_ip == $mgs_ip ] && continue + fi + + out2=$(do_node $node_ip $LCTL get_param $opt \ + nodemap.$proc_param 2>/dev/null) + echo "On $node ${node_ip}, ${proc_param} = $out2" + [ "$out1" != "$out2" ] && is_sync=false && break + done + $is_sync && break + sleep 1 + done + if ! $is_sync; then + echo MGS + echo $out1 + echo OTHER - IP: $node_ip + echo $out2 + error "mgs and $nodemap_name ${key} mismatch, $i attempts" + fi + echo "waited $((i - 1)) seconds for sync" +} + +consume_precreations() { + local dir=$1 + local mfacet=$2 + local OSTIDX=$3 + local extra=${4:-2} + local OST=$(ostname_from_index $OSTIDX $dir) + + test_mkdir -p $dir/${OST} + $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST} + + # on the mdt's osc + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST) + local last_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_last_id) + local next_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_next_id) + echo "Creating to objid $last_id on ost $OST..." + createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra)) +} + +__exhaust_precreations() { + local OSTIDX=$1 + local FAILLOC=$2 + local FAILIDX=${3:-$OSTIDX} + local ofacet=ost$((OSTIDX + 1)) + + mkdir_on_mdt0 $DIR/$tdir + local mdtidx=$($LFS getstripe -m $DIR/$tdir) + local mfacet=mds$((mdtidx + 1)) + echo OSTIDX=$OSTIDX MDTIDX=$mdtidx + + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet) + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + +#define OBD_FAIL_OST_ENOSPC 0x215 + do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215 + + consume_precreations $DIR/$tdir $mfacet $OSTIDX + + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC +} + +exhaust_precreations() { + __exhaust_precreations $1 $2 $3 + sleep_maxage +} + +exhaust_all_precreations() { + local i + for (( i=0; i < OSTCOUNT; i++ )) ; do + __exhaust_precreations $i $1 -1 + done + sleep_maxage +}