X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=04f9599afd29affedb127ddd7008460240eb4e0e;hb=c03ef784aa9b899da4461c6dd30223121a248cf4;hp=0ff2a819892e32a8a65f5bd848fde0e6872392da;hpb=2c656415c3aa8c13ae86b01f6b93e711d6cd1328;p=fs%2Flustre-release.git diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 0ff2a81..04f9599 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -277,6 +277,8 @@ init_test_env() { [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey) export MCREATE=${MCREATE:-mcreate} export MULTIOP=${MULTIOP:-multiop} + export MMAP_CAT=${MMAP_CAT:-mmap_cat} + export STATX=${STATX:-statx} # Ubuntu, at least, has a truncate command in /usr/bin # so fully path our truncate command. export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate} @@ -291,12 +293,17 @@ init_test_env() { fi export RSYNC_RSH=${RSYNC_RSH:-rsh} + export LNETCTL=${LNETCTL:-"$LUSTRE/../lnet/utils/lnetctl"} + [ ! -f "$LNETCTL" ] && export LNETCTL=$(which lnetctl 2> /dev/null) export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LFS" ] && export LFS=$(which lfs) + export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"} + [ ! -f "$KSOCKLND_CONFIG" ] && + export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null) - export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"} + export PERM_CMD=$(echo ${PERM_CMD:-"$LCTL conf_param"}) export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"} if [ ! -f "$L_GETIDENTITY" ]; then @@ -424,11 +431,22 @@ init_test_env() { # Constants used in more than one test script export LOV_MAX_STRIPE_COUNT=2000 + export DELETE_OLD_POOLS=${DELETE_OLD_POOLS:-false} + export KEEP_POOLS=${KEEP_POOLS:-false} + export PARALLEL=${PARALLEL:-"no"} export MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} get_lustre_env + # use localrecov to enable recovery for local clients, LU-12722 + [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || { + export MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o localrecov"} + export MGS_MOUNT_OPTS=${MGS_MOUNT_OPTS:-"-o localrecov"} + } + + [[ $OST1_VERSION -lt $(version_code 2.13.52) ]] || + export OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o localrecov"} } check_cpt_number() { @@ -448,10 +466,10 @@ check_cpt_number() { # Return a numeric version code based on a version string. The version # code is useful for comparison two version strings to see which is newer. version_code() { - # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3" - eval set -- $(tr "[:punct:]" " " <<< $*) + # split arguments like "1.8.6-wc3" into "1", "8", "6", "3" + eval set -- $(tr "[:punct:][a-z]" " " <<< $*) - echo -n "$(((${1:-0} << 16) | (${2:-0} << 8) | ${3:-0}))" + echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0}))) } export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/") @@ -488,14 +506,31 @@ export LINUX_VERSION_CODE=$(version_code ${LINUX_VERSION//\./ }) # output: prints version string to stdout in (up to 4) dotted-decimal values lustre_build_version() { local facet=${1:-client} - local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null || - $LCTL lustre_build_version 2>/dev/null || - $LCTL --version 2>/dev/null | cut -d' ' -f2") + local facet_version=${facet}_VERSION + + # if the global variable is already set, then use that + [ -n "${!facet_version}" ] && echo ${!facet_version} && return + + # this is the currently-running version of the kernel modules + local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null") + # we mostly test 2.10+ systems, only try others if the above fails + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL lustre_build_version 2>/dev/null") + fi + if [ -z "$ver" ]; then + ver=$(do_facet $facet "$LCTL --version 2>/dev/null" | + cut -d' ' -f2) + fi local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1) [ -n "$lver" ] && ver="$lver" - sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver | - cut -d. -f1-4 + lver=$(sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' <<<$ver | + tr _ . | cut -d. -f1-4) + + # save in global variable for the future + export $facet_version=$lver + + echo $lver } # Report the Lustre numeric build version code for the supplied facet. @@ -507,6 +542,17 @@ module_loaded () { /sbin/lsmod | grep -q "^\<$1\>" } +check_lfs_df_ret_val() { + # Ignore only EOPNOTSUPP (which is 95; Operation not supported) error + # returned by 'lfs df' for valid dentry but not a lustrefs. + # + # 'lfs df' historically always returned success(0) instead of + # EOPNOTSUPP. This function for compatibility reason, ignores and + # masquerades EOPNOTSUPP as success. + [[ $1 -eq 95 ]] && return 0 + return $1 +} + PRLFS=false lustre_insmod() { local module=$1 @@ -631,29 +677,20 @@ load_module() { fi } -load_modules_local() { - if [ -n "$MODPROBE" ]; then - # use modprobe - echo "Using modprobe to load modules" - return 0 - fi - - # Create special udev test rules on every node - if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then { - sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules - } else { - echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules - } fi - udevadm control --reload-rules - udevadm trigger +do_lnetctl() { + $LCTL mark "$LNETCTL $*" + echo "$LNETCTL $*" + $LNETCTL "$@" +} +load_lnet() { # For kmemleak-enabled kernels we need clear all past state # that obviously has nothing to do with this Lustre run # Disable automatic memory scanning to avoid perf hit. if [ -f /sys/kernel/debug/kmemleak ] ; then - echo scan=off > /sys/kernel/debug/kmemleak - echo scan > /sys/kernel/debug/kmemleak - echo clear > /sys/kernel/debug/kmemleak + echo scan=off > /sys/kernel/debug/kmemleak || true + echo scan > /sys/kernel/debug/kmemleak || true + echo clear > /sys/kernel/debug/kmemleak || true fi echo Loading modules from $LUSTRE @@ -678,6 +715,7 @@ load_modules_local() { # if there is more than 4 CPU cores, libcfs should create multiple CPU # partitions. So we just force libcfs to create 2 partitions for # system with 2 or 4 cores + local saved_opts="$MODOPTS_LIBCFS" if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then # force to enable multiple CPU partitions echo "Force libcfs to create 2 CPU partitions" @@ -691,30 +729,62 @@ load_modules_local() { # variable to remote nodes unset MODOPTS_LIBCFS - set_default_debug - load_module ../lnet/lnet/lnet + set_default_debug "neterror net nettrace malloc" + if [ "$1" = "config_on_load=1" ]; then + load_module ../lnet/lnet/lnet + else + load_module ../lnet/lnet/lnet "$@" + fi LNDPATH=${LNDPATH:-"../lnet/klnds"} if [ -z "$LNETLND" ]; then case $NETTYPE in - o2ib*) LNETLND="o2iblnd/ko2iblnd" ;; - tcp*) LNETLND="socklnd/ksocklnd" ;; - *) local lnd="${NETTYPE%%[0-9]}lnd" + o2ib*) LNETLND="o2iblnd/ko2iblnd" ;; + tcp*) LNETLND="socklnd/ksocklnd" ;; + *) local lnd="${NETTYPE%%[0-9]}lnd" [ -f "$LNDPATH/$lnd/k$lnd.ko" ] && LNETLND="$lnd/k$lnd" || LNETLND="socklnd/ksocklnd" esac fi load_module ../lnet/klnds/$LNETLND + + if [ "$1" = "config_on_load=1" ]; then + do_lnetctl lnet configure --all || + return $? + fi +} + +load_modules_local() { + if [ -n "$MODPROBE" ]; then + # use modprobe + echo "Using modprobe to load modules" + return 0 + fi + + # Create special udev test rules on every node + if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then { + sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules + } else { + echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules + } fi + udevadm control --reload-rules + udevadm trigger + + load_lnet + load_module obdclass/obdclass + if ! client_only; then + MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"} + fi load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss load_module fld/fld load_module fid/fid load_module lmv/lmv load_module osc/osc - load_module mdc/mdc load_module lov/lov + load_module mdc/mdc load_module mgc/mgc load_module obdecho/obdecho if ! client_only; then @@ -770,12 +840,24 @@ load_modules_local() { } load_modules () { + local facets + local facet + local failover load_modules_local # bug 19124 # load modules on remote nodes optionally # lustre-tests have to be installed on these nodes if $LOAD_MODULES_REMOTE; then local list=$(comma_list $(remote_nodes_list)) + + # include failover nodes in case they are not in the list yet + facets=$(get_facets) + for facet in ${facets//,/ }; do + failover=$(facet_failover_host $facet) + [ -n "$list" ] && [[ ! "$list" =~ "$failover" ]] && + list="$list,$failover" + done + if [ -n "$list" ]; then echo "loading modules on: '$list'" do_rpc_nodes "$list" load_modules_local @@ -784,21 +866,19 @@ load_modules () { } check_mem_leak () { - LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) - LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) - if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$LEAK_LUSTRE" 1>&2 - echo "$LEAK_PORTALS" 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` || true - echo "Memory leaks detected" - [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true - return 1 - fi + LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) + LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true) + if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then + echo "$LEAK_LUSTRE" 1>&2 + echo "$LEAK_PORTALS" 1>&2 + mv $TMP/debug $TMP/debug-leak.`date +%s` || true + echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true + return 1 + fi } -unload_modules() { - wait_exit_ST client # bug 12845 - +unload_modules_local() { $LUSTRE_RMMOD ldiskfs || return 2 [ -f /etc/udev/rules.d/99-lustre-test.rules ] && @@ -806,15 +886,23 @@ unload_modules() { udevadm control --reload-rules udevadm trigger + check_mem_leak || return 254 + + return 0 +} + +unload_modules() { + local rc=0 + + wait_exit_ST client # bug 12845 + + unload_modules_local || rc=$? + if $LOAD_MODULES_REMOTE; then local list=$(comma_list $(remote_nodes_list)) if [ -n "$list" ]; then echo "unloading modules on: '$list'" - do_rpc_nodes "$list" $LUSTRE_RMMOD ldiskfs - do_rpc_nodes "$list" check_mem_leak - do_rpc_nodes "$list" "rm -f /etc/udev/rules.d/99-lustre-test.rules" - do_rpc_nodes "$list" "udevadm control --reload-rules" - do_rpc_nodes "$list" "udevadm trigger" + do_rpc_nodes "$list" unload_modules_local fi fi @@ -825,10 +913,9 @@ unload_modules() { rm -f $sbin_mount fi - check_mem_leak || return 254 + [[ $rc -eq 0 ]] && echo "modules unloaded." - echo "modules unloaded." - return 0 + return $rc } fs_log_size() { @@ -836,7 +923,7 @@ fs_log_size() { local size=0 case $(facet_fstype $facet) in - ldiskfs) size=50;; # largest seen is 44, leave some headroom + ldiskfs) size=72;; # largest seen is 64, leave some headroom # grant_block_size is in bytes, allow at least 2x max blocksize zfs) size=$(lctl get_param osc.$FSNAME*.import | awk '/grant_block_size:/ {print $2/512; exit;}') @@ -972,6 +1059,22 @@ add_sk_mntflag() { echo -n $mt_opts } +from_build_tree() { + local from_tree + + case $LUSTRE in + /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ + /usr/lib64/lustre ) + from_tree=false + ;; + *) + from_tree=true + ;; + esac + + [ $from_tree = true ] +} + init_gss() { if $SHARED_KEY; then GSS=true @@ -982,16 +1085,6 @@ init_gss() { return fi - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if ! module_loaded ptlrpc_gss; then load_module ptlrpc/gss/ptlrpc_gss module_loaded ptlrpc_gss || @@ -1002,6 +1095,16 @@ init_gss() { start_gss_daemons || error_exit "start gss daemon failed! rc=$?" fi + if $GSS_SK && ! $SK_NO_KEY; then + echo "Loading basic SSK keys on all servers" + do_nodes $(comma_list $(all_server_nodes)) \ + "lgss_sk -t server -l $SK_PATH/$FSNAME.key || true" + do_nodes $(comma_list $(all_server_nodes)) \ + "keyctl show | grep lustre | cut -c1-11 | + sed -e 's/ //g;' | + xargs -IX keyctl setperm X 0x3f3f3f3f" + fi + if $GSS_SK && $SK_NO_KEY; then local numclients=${1:-$CLIENTCOUNT} local clients=${CLIENTS:-$HOSTNAME} @@ -1010,7 +1113,7 @@ init_gss() { SK_NO_KEY=false local lgssc_conf_file="/etc/request-key.d/lgssc.conf" - if $from_build_tree; then + if from_build_tree; then mkdir -p $SK_OM_PATH if grep -q request-key /proc/mounts > /dev/null; then echo "SSK: Request key already mounted." @@ -1029,7 +1132,7 @@ init_gss() { cat $lgssc_conf_file if ! local_mode; then - if $from_build_tree; then + if from_build_tree; then do_nodes $(comma_list $(all_nodes)) "mkdir -p \ $SK_OM_PATH" do_nodes $(comma_list $(all_nodes)) "mount \ @@ -1115,7 +1218,7 @@ init_gss() { fi if [ -n "$LGSS_KEYRING_DEBUG" ] && \ - ( local_mode || $from_build_tree ); then + ( local_mode || from_build_tree ); then lctl set_param -n \ sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG elif [ -n "$LGSS_KEYRING_DEBUG" ]; then @@ -1134,16 +1237,6 @@ cleanup_gss() { cleanup_sk() { if $GSS_SK; then - case $LUSTRE in - /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \ - /usr/lib64/lustre ) - from_build_tree=false - ;; - *) - from_build_tree=true - ;; - esac - if $SK_S2S; then do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNM" do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNMCLI" @@ -1156,7 +1249,7 @@ cleanup_sk() { $SK_PATH/$FSNAME*.key $SK_PATH/nodemap/$FSNAME*.key" do_nodes $(comma_list $(all_nodes)) "keyctl show | \ awk '/lustre/ { print \\\$1 }' | xargs -IX keyctl unlink X" - if $from_build_tree; then + if from_build_tree; then # Remove the mount and clean up the files we added to # SK_PATH do_nodes $(comma_list $(all_nodes)) "while grep -q \ @@ -1280,20 +1373,6 @@ devicelabel() { echo -n $label } -mdsdevlabel() { - local num=$1 - local device=$(mdsdevname $num) - local label=$(devicelabel mds$num ${device} | grep -v "CMD: ") - echo -n $label -} - -ostdevlabel() { - local num=$1 - local device=$(ostdevname $num) - local label=$(devicelabel ost$num ${device} | grep -v "CMD: ") - echo -n $label -} - # # Get the device of a facet. # @@ -1526,8 +1605,7 @@ get_osd_param() { local device=${2:-$FSNAME-OST*} local name=$3 - do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \ - osd-*.$device.$name 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name" } set_osd_param() { @@ -1536,8 +1614,7 @@ set_osd_param() { local name=$3 local value=$4 - do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \ - osd-*.$device.$name=$value 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value" } set_debug_size () { @@ -1596,6 +1673,22 @@ set_default_debug_facet () { set_default_debug_nodes $node "$debug" "$subsys" $debug_size } +set_params_nodes () { + [[ $# -ge 2 ]] || return 0 + + local nodes=$1 + shift + do_nodes $nodes $LCTL set_param $@ +} + +set_params_clients () { + local clients=${1:-$CLIENTS} + local params=${2:-$CLIENT_LCTL_SETPARAM_PARAM} + + [[ -n $params ]] || return 0 + set_params_nodes $clients $params +} + set_hostid () { local hostid=${1:-$(hostid)} @@ -1609,9 +1702,17 @@ set_hostid () { mount_facets () { local facets=${1:-$(get_facets)} local facet + local -a mountpids + local total=0 + local ret=0 for facet in ${facets//,/ }; do - mount_facet $facet + mount_facet $facet & + mountpids[total]=$! + total=$((total+1)) + done + for ((index=0; index<$total; index++)); do + wait ${mountpids[index]} local RC=$? [ $RC -eq 0 ] && continue @@ -1621,8 +1722,9 @@ mount_facets () { else error "Restart of $facet failed!" fi - return $RC + ret=$RC done + return $ret } # @@ -1990,6 +2092,9 @@ mount_facet() { local devicelabel local dm_dev=${!dev} + [[ $dev == "mgsfailover_dev" ]] && combined_mgs_mds && + dev=mds1failover_dev + module_loaded lustre || load_modules case $fstype in @@ -2082,11 +2187,17 @@ start() { eval export ${dev_alias}_dev=${device} eval export ${facet}_opt=\"$@\" + combined_mgs_mds && [[ ${dev_alias} == mds1 ]] && + eval export mgs_dev=${device} + local varname=${dev_alias}failover_dev if [ -n "${!varname}" ] ; then eval export ${dev_alias}failover_dev=${!varname} else eval export ${dev_alias}failover_dev=$device + combined_mgs_mds && [[ ${dev_alias} == mds1 ]] && + eval export mgsfailover_dev=${device} + fi local mntpt=$(facet_mntpt $facet) @@ -2149,7 +2260,7 @@ restore_quota() { if [ "$old_MDT_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable = \ + osd-*.$FSNAME-MDT*.quota_slave.enabled = \ $old_MDT_QUOTA_TYPE else do_facet mgs $PERM_CMD \ @@ -2159,7 +2270,7 @@ restore_quota() { if [ "$old_OST_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable = \ + osd-*.$FSNAME-OST*.quota_slave.enabled = \ $old_OST_QUOTA_TYPE else do_facet mgs $LCTL conf_param \ @@ -2173,6 +2284,7 @@ restore_quota() { # This will allow fixing the "lfs df" summary line in the future. lfs_df() { $LFS df $* | sed -e 's/filesystem /filesystem_/' + check_lfs_df_ret_val $? } # Get free inodes on the MDT specified by mdt index, free indoes on @@ -2198,10 +2310,11 @@ mdt_free_inodes() { ost_dev_status() { local ost_idx=$1 local mnt_pnt=${2:-$MOUNT} + local opts=$3 local ost_uuid ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt) - lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' + lfs_df $opts $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }' } setup_quota(){ @@ -2219,9 +2332,9 @@ setup_quota(){ if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-MDT*.quota_slave.enabled=$QUOTA_TYPE do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-OST*.quota_slave.enabled=$QUOTA_TYPE else do_facet mgs $PERM_CMD $FSNAME.quota.mdt=$QUOTA_TYPE || error "set mdt quota type failed" @@ -2307,6 +2420,7 @@ zconf_mount() { fi set_default_debug_nodes $client + set_params_clients $client return 0 } @@ -2341,18 +2455,17 @@ zconf_umount() { # Mount the file system on the MDS mount_mds_client() { - local mds_HOST=${SINGLEMDS}_HOST - echo $mds_HOST - do_facet $SINGLEMDS "mkdir -p $MOUNT2" - zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS || - error "unable to mount $MOUNT2 on MDS" + local host=$(facet_active_host $SINGLEMDS) + echo $host + zconf_mount $host $MOUNT2 $MOUNT_OPTS || + error "unable to mount $MOUNT2 on $host" } # Unmount the file system on the MDS umount_mds_client() { - local mds_HOST=${SINGLEMDS}_HOST - zconf_umount $mds1_HOST $MOUNT2 - do_facet $SINGLEMDS "rm -rf $MOUNT2" + local host=$(facet_active_host $SINGLEMDS) + zconf_umount $host $MOUNT2 + do_facet $SINGLEMDS "rmdir $MOUNT2" } # nodes is comma list @@ -2534,6 +2647,7 @@ exit \\\$rc" || return ${PIPESTATUS[0]} do_nodes $clients "mount | grep $mnt' '" set_default_debug_nodes $clients + set_params_clients $clients return 0 } @@ -2667,19 +2781,27 @@ remount_facet() { reboot_facet() { local facet=$1 + local node=$(facet_active_host $facet) + local sleep_time=${2:-10} + if [ "$FAILURE_MODE" = HARD ]; then - reboot_node $(facet_active_host $facet) + boot_node $node else - sleep 10 + sleep $sleep_time fi } boot_node() { - local node=$1 - if [ "$FAILURE_MODE" = HARD ]; then - reboot_node $node - wait_for_host $node - fi + local node=$1 + + if [ "$FAILURE_MODE" = HARD ]; then + reboot_node $node + wait_for_host $node + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $node: $facet" + do_rpc_nodes $node load_modules_local + fi + fi } facets_hosts () { @@ -2933,83 +3055,118 @@ stop_client_loads() { } # End recovery-scale functions -# verify that lustre actually cleaned up properly -cleanup_check() { - VAR=$(lctl get_param -n catastrophe 2>&1) - if [ $? = 0 ] ; then - if [ $VAR != 0 ]; then - error "LBUG/LASSERT detected" - fi - fi - BUSY=$(dmesg | grep -i destruct || true) - if [ -n "$BUSY" ]; then - echo "$BUSY" 1>&2 - [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.$(date +%s) - exit 205 - fi - - check_mem_leak || exit 204 - - [[ $($LCTL dl 2>/dev/null | wc -l) -gt 0 ]] && $LCTL dl && - echo "$TESTSUITE: lustre didn't clean up..." 1>&2 && - return 202 || true - - if module_loaded lnet || module_loaded libcfs; then - echo "$TESTSUITE: modules still loaded..." 1>&2 - /sbin/lsmod 1>&2 - return 203 - fi - return 0 -} +## +# wait for a command to return the expected result +# +# This will run @check on @node repeatedly until the output matches @expect +# based on the supplied condition, or until @max_wait seconds have elapsed, +# whichever comes first. @cond may be one of the normal bash operators, +# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted +# in the caller to avoid unintentional evaluation by the shell in the caller. +# +# If @max_wait is not specified, the condition will be checked for up to 90s. +# +# If --verbose is passed as the first argument, the result is printed on each +# value change, otherwise it is only printed after every 10s interval. +# +# If --quiet is passed as the first/second argument, the do_node() command +# will not print the remote command before executing it each time. +# +# Using wait_update_cond() or related helper function is preferable to adding +# a "long enough" wait for some state to change in the background, since +# "long enough" may be too short due to tunables, system config, or running in +# a VM, and must by necessity wait too long for most cases or risk failure. +# +# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait] +wait_update_cond() { + local verbose + local quiet -wait_update () { - local verbose=false - if [[ "$1" == "--verbose" ]]; then - shift - verbose=true - fi + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$1 - local TEST=$2 - local FINAL=$3 - local MAX=${4:-90} - local RESULT - local PREV_RESULT - local WAIT=0 + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=${5:-90} + local result + local prev_result + local waited=0 + local begin=$SECONDS local sleep=1 local print=10 - PREV_RESULT=$(do_node $node "$TEST") - while [ true ]; do - RESULT=$(do_node $node "$TEST") - if [[ "$RESULT" == "$FINAL" ]]; then - [[ -z "$RESULT" || $WAIT -le $sleep ]] || - echo "Updated after ${WAIT}s: wanted '$FINAL'"\ - "got '$RESULT'" + while (( $waited <= $max_wait )); do + result=$(do_node $quiet $node "$check") + + eval [[ "'$result'" $cond "'$expect'" ]] + if [[ $? == 0 ]]; then + [[ -z "$result" || $waited -le $sleep ]] || + echo "Updated after ${waited}s: want '$expect' got '$result'" return 0 fi - if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then - echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\ - "to '$RESULT'" - PREV_RESULT=$RESULT + if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then + [[ -n "$prev_result" ]] && + echo "Changed after ${waited}s: from '$prev_result' to '$result'" + prev_result="$result" fi - [[ $WAIT -ge $MAX ]] && break - [[ $((WAIT % print)) -eq 0 ]] && - echo "Waiting $((MAX - WAIT)) secs for update" - WAIT=$((WAIT + sleep)) + (( $waited % $print == 0 )) && + echo "Waiting $((max_wait - waited))s for '$expect'" sleep $sleep + waited=$((SECONDS - begin)) done - echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'" + echo "Update not seen after ${max_wait}s: want '$expect' got '$result'" return 3 } +# usage: wait_update [--verbose] [--quiet] node check expect [max_wait] +wait_update() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local node="$1" + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait +} + +# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait] +wait_update_facet_cond() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local node=$(facet_active_host $1) + local check="$2" + local cond="$3" + local expect="$4" + local max_wait=$5 + + wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait +} + +# usage: wait_update_facet [--verbose] facet check expect [max_wait] wait_update_facet() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet - local facet=$1 - shift - wait_update $verbose $(facet_active_host $facet) "$@" + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local node=$(facet_active_host $1) + local check="$2" + local expect="$3" + local max_wait=$4 + + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait } sync_all_data() { @@ -3221,13 +3378,13 @@ wait_delete_completed_mds() { } wait_for_host() { - local hostlist=$1 + local hostlist=$1 - # we can use "for" here because we are waiting the slowest - for host in ${hostlist//,/ }; do - check_network "$host" 900 - done - while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done + # we can use "for" here because we are waiting the slowest + for host in ${hostlist//,/ }; do + check_network "$host" 900 + done + while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done } wait_for_facet() { @@ -3273,8 +3430,11 @@ wait_recovery_complete () { fi echo affected facets: $facets - # we can use "for" here because we are waiting the slowest - for facet in ${facets//,/ }; do + facets=${facets//,/ } + # We can use "for" here because we are waiting the slowest. + # The mgs not having the recovery_status proc entry, exclude it + # from the facet list. + for facet in ${facets//mgs/ }; do local var_svc=${facet}_svc local param="*.${!var_svc}.recovery_status" @@ -3345,14 +3505,15 @@ wait_osts_up() { } wait_destroy_complete () { - echo "Waiting for local destroys to complete" + echo "Waiting for MDT destroys to complete" # MAX value shouldn't be big as this mean server responsiveness # never increase this just to make test pass but investigate # why it takes so long time - local MAX=5 + local MAX=${1:-5} local WAIT=0 + local list=$(comma_list $(mdts_nodes)) while [ $WAIT -lt $MAX ]; do - local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight)) + local -a RPCs=($(do_nodes $list $LCTL get_param -n osp.*.destroys_in_flight)) local con=1 local i @@ -3367,7 +3528,7 @@ wait_destroy_complete () { echo "Waiting ${WAIT}s for local destroys to complete" WAIT=$((WAIT + 1)) done - echo "Local destroys weren't done in $MAX sec." + echo "MDT destroys weren't done in $MAX sec." return 1 } @@ -3432,12 +3593,15 @@ wait_remote_prog () { lfs_df_check() { local clients=${1:-$CLIENTS} + local rc=0 if [ -z "$clients" ]; then - $LFS df $MOUNT + $LFS df $MOUNT > /dev/null || rc=$? else - $PDSH $clients "$LFS df $MOUNT" > /dev/null + $PDSH $clients "$LFS df $MOUNT" > /dev/null || rc=$? fi + + check_lfs_df_ret_val $rc } clients_up() { @@ -3446,6 +3610,21 @@ clients_up() { lfs_df_check } +all_mds_up() { + (( MDSCOUNT == 1 )) && return + + # wait so that statfs data on MDT expire + local delay=$(do_facet mds1 $LCTL \ + get_param -n osp.*MDT*MDT0000.maxage | sort -n | tail -1) + + [ -n "$delay" ] || error "fail to get maxage" + sleep $delay + local nodes=$(comma_list $(mdts_nodes)) + # initiate statfs RPC, all to all MDTs + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null +} + client_up() { # usually checked on particular client or locally sleep 1 @@ -3534,11 +3713,13 @@ facet_failover() { $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ $(mdsdevname 1) "-n" || error "Running e2fsck") + local -a mountpids + for ((index=0; index<$total; index++)); do facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) echo reboot facets: ${affecteds[index]} - reboot_facet $facet + reboot_facet $facet $sleep_time change_active ${affecteds[index]} @@ -3551,11 +3732,20 @@ facet_failover() { if ! combined_mgs_mds && list_member ${affecteds[index]} mgs; then mount_facet mgs || error "Restart of mgs failed" + affecteds[index]=$(exclude_items_from_list \ + ${affecteds[index]} mgs) fi - # FIXME; has to be changed to mount all facets concurrently - affected=$(exclude_items_from_list ${affecteds[index]} mgs) - echo mount facets: ${affecteds[index]} - mount_facets ${affecteds[index]} + if [ -n "${affecteds[index]}" ]; then + echo mount facets: ${affecteds[index]} + mount_facets ${affecteds[index]} & + mountpids[index]=$! + fi + done + for ((index=0; index<$total; index++)); do + if [ -n "${affecteds[index]}" ]; then + wait ${mountpids[index]} + fi + if $GSS_SK; then do_nodes $(comma_list $(all_nodes)) \ "keyctl show | grep lustre | cut -c1-11 | @@ -3565,10 +3755,6 @@ facet_failover() { done } -obd_name() { - local facet=$1 -} - replay_barrier() { local facet=$1 do_facet $facet "sync; sync; sync" @@ -3668,25 +3854,29 @@ fail() { } fail_nodf() { - local facet=$1 - facet_failover $facet + local facet=$1 + + facet_failover $facet } fail_abort() { local facet=$1 + local abort_type=${2:-"abort_recovery"} + stop $facet change_active $facet wait_for_facet $facet - mount_facet $facet -o abort_recovery + mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" + all_mds_up } host_nids_address() { local nodes=$1 local net=${2:-"."} - do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @" + do_nodes $nodes "$LCTL list_nids | grep -w $net | cut -f 1 -d @" } h2name_or_ip() { @@ -3839,7 +4029,14 @@ facet_host() { elif [ "${facet:0:3}" == "mdt" -o \ "${facet:0:3}" == "mds" -o \ "${facet:0:3}" == "mgs" ]; then - eval export ${facet}_HOST=${mds_HOST} + local temp + if [ "${facet}" == "mgsfailover" ] && + [ -n "$mds1failover_HOST" ]; then + temp=$mds1failover_HOST + else + temp=${mds_HOST} + fi + eval export ${facet}_HOST=$temp fi fi echo -n ${!varname} @@ -3855,6 +4052,13 @@ facet_failover_host() { return fi + if combined_mgs_mds && [ $facet == "mgs" ] && + [ -z $mds1failover_HOST ]; then + temp=mds1failover_HOST + echo ${!temp} + return + fi + if [ "${facet:0:3}" == "mdt" -o "${facet:0:3}" == "mds" -o \ "${facet:0:3}" == "mgs" ]; then @@ -3912,8 +4116,6 @@ change_active() { local facetlist=$1 local facet - facetlist=$(exclude_items_from_list $facetlist mgs) - for facet in ${facetlist//,/ }; do local failover=${facet}failover local host=`facet_host $failover` @@ -3936,26 +4138,27 @@ change_active() { } do_node() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local HOST=$1 - shift - local myPDSH=$PDSH - if [ "$HOST" = "$HOSTNAME" ]; then - myPDSH="no_dsh" - elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then - echo "cannot run remote command on $HOST with $myPDSH" - return 128 - fi - if $VERBOSE; then - echo "CMD: $HOST $@" >&2 - $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local HOST=$1 + shift + local myPDSH=$PDSH + + if [ "$HOST" = "$HOSTNAME" ]; then + myPDSH="no_dsh" + elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then + echo "cannot run remote command on $HOST with $myPDSH" + return 128 + fi + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $HOST $@" >&2 + $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : + fi if [[ "$myPDSH" == "rsh" ]] || [[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then @@ -3966,31 +4169,35 @@ do_node() { eval $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; PATH=\$PATH:/sbin:/usr/sbin; cd $RPWD; - LUSTRE=\"$RLUSTRE\" sh -c \"$@\") || + LUSTRE=\"$RLUSTRE\" bash -c \"$@\") || echo command failed >$command_status" [[ -n "$($myPDSH $HOST cat $command_status)" ]] && return 1 || return 0 fi - if $verbose ; then - # print HOSTNAME for myPDSH="no_dsh" - if [[ $myPDSH = no_dsh ]]; then - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /" - else - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" - fi - else - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed "s/^${HOST}: //" - fi - return ${PIPESTATUS[0]} -} - -do_nodev() { - do_node --verbose "$@" + if [[ -n "$verbose" ]]; then + # print HOSTNAME for myPDSH="no_dsh" + if [[ $myPDSH = no_dsh ]]; then + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" | + sed -e "s/^/${HOSTNAME}: /" + else + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" + fi + else + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" | + sed "s/^${HOST}: //" + fi + return ${PIPESTATUS[0]} } single_local_node () { - [ "$1" = "$HOSTNAME" ] + [ "$1" = "$HOSTNAME" ] } # Outputs environment variable assignments that should be passed to remote nodes @@ -4036,45 +4243,42 @@ get_env_vars() { } do_nodes() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local rnodes=$1 - shift + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift - if single_local_node $rnodes; then - if $verbose; then - do_nodev $rnodes "$@" - else - do_node $rnodes "$@" - fi - return $? - fi + local rnodes=$1 + shift + + if single_local_node $rnodes; then + do_node $verbose $quiet $rnodes "$@" + return $? + fi - # This is part from do_node - local myPDSH=$PDSH + # This is part from do_node + local myPDSH=$PDSH - [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \ - echo "cannot run remote command on $rnodes with $myPDSH" && return 128 + [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && + echo "cannot run remote command on $rnodes with $myPDSH" && + return 128 - export FANOUT=$(get_node_count "${rnodes//,/ }") - if $VERBOSE; then - echo "CMD: $rnodes $@" >&2 - $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + export FANOUT=$(get_node_count "${rnodes//,/ }") + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $rnodes $@" >&2 + $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : + fi - # do not replace anything from pdsh output if -N is used - # -N Disable hostname: prefix on lines of output. - if $verbose || [[ $myPDSH = *-N* ]]; then - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" - else - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g" - fi - return ${PIPESTATUS[0]} + # do not replace anything from pdsh output if -N is used + # -N Disable hostname: prefix on lines of output. + if [[ -n "$verbose" || $myPDSH = *-N* ]]; then + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$@\")" + else + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$@\")" | sed -re "s/^[^:]*: //g" + fi + return ${PIPESTATUS[0]} } ## @@ -4085,11 +4289,18 @@ do_nodes() { # # usage: do_facet $facet command [arg ...] do_facet() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + local facet=$1 shift - local HOST=$(facet_active_host $facet) - [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1 - do_node $HOST "$@" + local host=$(facet_active_host $facet) + + [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1 + do_node $verbose $quiet $host "$@" } # Function: do_facet_random_file $FACET $FILE $SIZE @@ -4112,7 +4323,7 @@ do_facet_create_file() { } do_nodesv() { - do_nodes --verbose "$@" + do_nodes --verbose "$@" } add() { @@ -4122,6 +4333,12 @@ add() { stop ${facet} -f rm -f $TMP/${facet}active [[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive + + # make sure in-tree ldiskfs is loaded before mkfs + if local_mode && [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then + load_module ../ldiskfs/ldiskfs + fi + do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]} if [[ $(facet_fstype $facet) == zfs ]]; then @@ -4535,14 +4752,11 @@ mkfs_opts() { opts+=${LDLM_TIMEOUT:+" --param=sys.ldlm_timeout=$LDLM_TIMEOUT"} if [ $type == MDS ]; then - opts+=${MDSCAPA:+" --param-mdt.capa=$MDSCAPA"} opts+=${DEF_STRIPE_SIZE:+" --param=lov.stripesize=$DEF_STRIPE_SIZE"} opts+=${DEF_STRIPE_COUNT:+" --param=lov.stripecount=$DEF_STRIPE_COUNT"} opts+=${L_GETIDENTITY:+" --param=mdt.identity_upcall=$L_GETIDENTITY"} if [ $fstype == ldiskfs ]; then - fs_mkfs_opts+="-O ea_inode,large_dir" - var=${facet}_JRN if [ -n "${!var}" ]; then fs_mkfs_opts+=" -J device=${!var}" @@ -4554,8 +4768,6 @@ mkfs_opts() { fi if [ $type == OST ]; then - opts+=${OSSCAPA:+" --param=ost.capa=$OSSCAPA"} - if [ $fstype == ldiskfs ]; then var=${facet}_JRN if [ -n "${!var}" ]; then @@ -4579,6 +4791,8 @@ mkfs_opts() { var=${type}_FS_MKFS_OPTS fs_mkfs_opts+=${!var:+" ${!var}"} + [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project" + [ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts) if [ -n "${fs_mkfs_opts## }" ]; then @@ -5021,7 +5235,14 @@ init_facet_vars () { local varname=${facet}failover_HOST if [ -z "${!varname}" ]; then - eval export $varname=$(facet_host $facet) + local temp + if combined_mgs_mds && [ $facet == "mgs" ] && + [ -n "$mds1failover_HOST" ]; then + temp=$mds1failover_HOST + else + temp=$(facet_host $facet) + fi + eval export $varname=$temp fi varname=${facet}_HOST @@ -5054,12 +5275,12 @@ init_facets_vars () { if ! remote_mds_nodsh; then for num in $(seq $MDSCOUNT); do - DEVNAME=`mdsdevname $num` + DEVNAME=$(mdsdevname $num) init_facet_vars mds$num $DEVNAME $MDS_MOUNT_OPTS done fi - combined_mgs_mds || init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS + init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS if ! remote_ost_nodsh; then for num in $(seq $OSTCOUNT); do @@ -5188,6 +5409,11 @@ init_param_vars () { TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout") log "Using TIMEOUT=$TIMEOUT" + # tune down to speed up testing on (usually) small setups + local mgc_timeout=/sys/module/mgc/parameters/mgc_requeue_timeout_min + do_nodes $(comma_list $(nodes_list)) \ + "[ -f $mgc_timeout ] && echo 1 > $mgc_timeout; exit 0" + osc_ensure_active $SINGLEMDS $TIMEOUT osc_ensure_active client $TIMEOUT $LCTL set_param osc.*.idle_timeout=debug @@ -5216,6 +5442,9 @@ init_param_vars () { # $LFS quotaoff -ug $MOUNT > /dev/null 2>&1 fi fi + + (( MDS1_VERSION <= $(version_code 2.13.52) )) || + do_facet mgs "$LCTL set_param -P lod.*.mdt_hash=crush" return 0 } @@ -5303,25 +5532,103 @@ is_mounted () { echo $mounted' ' | grep -w -q $mntpt' ' } -is_empty_dir() { - [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0 - return 1 +create_pools () { + local pool=$1 + local ostsn=${2:-$OSTCOUNT} + local npools=${FS_NPOOLS:-$((OSTCOUNT / ostsn))} + local n + + echo ostsn=$ostsn npools=$npools + if [[ $ostsn -gt $OSTCOUNT ]]; then + echo "request to use $ostsn OSTs in the pool, \ + using max available OSTCOUNT=$OSTCOUNT" + ostsn=$OSTCOUNT + fi + for (( n=0; n < $npools; n++ )); do + p=${pool}$n + if ! $DELETE_OLD_POOLS; then + log "request to not delete old pools: $FSNAME.$p exist?" + if ! check_pool_not_exist $FSNAME.$p; then + echo "Using existing $FSNAME.$p" + $LCTL pool_list $FSNAME.$p + continue + fi + fi + create_pool $FSNAME.$p $KEEP_POOLS || + error "create_pool $FSNAME.$p failed" + + local first=$(( (n * ostsn) % OSTCOUNT )) + local last=$(( (first + ostsn - 1) % OSTCOUNT )) + if [[ $first -le $last ]]; then + pool_add_targets $p $first $last || + error "pool_add_targets $p $first $last failed" + else + pool_add_targets $p $first $(( OSTCOUNT - 1 )) || + error "pool_add_targets $p $first \ + $(( OSTCOUNT - 1 )) failed" + pool_add_targets $p 0 $last || + error "pool_add_targets $p 0 $last failed" + fi + done } -# empty lustre filesystem may have empty directories lost+found and .lustre -is_empty_fs() { - # exclude .lustre & lost+found - [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \ - -print | wc -l) = 1 ] || return 1 - [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found || return 1 - if [ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.4.0) ]; then - # exclude .lustre/fid (LU-2780) - [ $(find $1/.lustre -maxdepth 1 -name fid -prune -o \ - -print | wc -l) = 1 ] || return 1 - else - [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre || return 1 - fi - return 0 +set_pools_quota () { + local u + local o + local p + local i + local j + + [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \ + $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!" + + # POOLS_QUOTA_USERS_SET= + # "quota15_1:20M -- for all of the found pools + # quota15_2:1G:gpool0 + # quota15_3 -- for global limit only + # quota15_4:200M:gpool0 + # quota15_4:200M:gpool1" + + declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"}) + declare -a pq_users + declare -A pq_limits + + for ((i=0; i<${#pq_userset[@]}; i++)); do + u=${pq_userset[i]%%:*} + o="" + # user gets no pool limits if + # POOLS_QUOTA_USERS_SET does not specify it + [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:} + pq_limits[$u]+=" $o" + done + pq_users=(${!pq_limits[@]}) + + declare -a opts + local pool + + for ((i=0; i<${#pq_users[@]}; i++)); do + u=${pq_users[i]} + # set to max limit (_u64) + $LFS setquota -u $u -B $((2**24 - 1))T $DIR + opts=(${pq_limits[$u]}) + for ((j=0; j<${#opts[@]}; j++)); do + p=${opts[j]##*:} + o=${opts[j]%%:*} + # Set limit for all existing pools if + # no pool specified + if [ $p == $o ]; then + p=$(list_pool $FSNAME | sed "s/$FSNAME.//") + echo "No pool specified for $u, + set limit $o for all existing pools" + fi + for pool in $p; do + $LFS setquota -u $u -B $o --pool $pool $DIR || + error "setquota -u $u -B $o \ + --pool $pool failed" + done + done + $LFS quota -uv $u --pool $DIR + done } check_and_setup_lustre() { @@ -5372,17 +5679,18 @@ check_and_setup_lustre() { export I_MOUNTED2=yes fi - if $do_check; then - # FIXME: what to do if check_config failed? - # i.e. if: - # 1) remote client has mounted other Lustre fs? - # 2) lustre is mounted on remote_clients atall ? - check_config_clients $MOUNT - init_facets_vars - init_param_vars + if $do_check; then + # FIXME: what to do if check_config failed? + # i.e. if: + # 1) remote client has mounted other Lustre fs? + # 2) lustre is mounted on remote_clients atall ? + check_config_clients $MOUNT + init_facets_vars + init_param_vars - set_default_debug_nodes $(comma_list $(nodes_list)) - fi + set_default_debug_nodes $(comma_list $(nodes_list)) + set_params_clients + fi if [ -z "$CLIENTONLY" -a $(lower $OSD_TRACK_DECLARES_LBUG) == 'yes' ]; then local facets="" @@ -5399,12 +5707,25 @@ check_and_setup_lustre() { fi fi + if [ -n "$fs_STRIPEPARAMS" ]; then + setstripe_getstripe $MOUNT $fs_STRIPEPARAMS + fi if $GSS_SK; then set_flavor_all null elif $GSS; then set_flavor_all $SEC fi + if $DELETE_OLD_POOLS; then + destroy_all_pools + fi + if [[ -n "$FS_POOL" ]]; then + create_pools $FS_POOL $FS_POOL_NOSTS + fi + + if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then + set_pools_quota + fi if [ "$ONLY" == "setup" ]; then exit 0 fi @@ -5435,51 +5756,6 @@ cleanup_and_setup_lustre() { check_and_setup_lustre } -# Get all of the server target devices from a given server node and type. -get_mnt_devs() { - local node=$1 - local type=$2 - local devs - local dev - - if [ "$type" == ost ]; then - devs=$(get_osd_param $node "" mntdev) - else - devs=$(do_node $node $LCTL get_param -n osd-*.$FSNAME-M*.mntdev) - fi - for dev in $devs; do - case $dev in - *loop*) do_node $node "losetup $dev" | \ - sed -e "s/.*(//" -e "s/).*//" ;; - *) echo $dev ;; - esac - done -} - -# Get all of the server target devices. -get_svr_devs() { - local node - local i - - # Master MDS parameters used by lfsck - MDTNODE=$(facet_active_host $SINGLEMDS) - MDTDEV=$(echo $(get_mnt_devs $MDTNODE mdt) | awk '{print $1}') - - # MDT devices - i=0 - for node in $(mdts_nodes); do - MDTDEVS[i]=$(get_mnt_devs $node mdt) - i=$((i + 1)) - done - - # OST devices - i=0 - for node in $(osts_nodes); do - OSTDEVS[i]=$(get_mnt_devs $node ost) - i=$((i + 1)) - done -} - # Run e2fsck on MDT or OST device. run_e2fsck() { local node=$1 @@ -5489,6 +5765,8 @@ run_e2fsck() { local log=$TMP/e2fsck.log local rc=0 + # turn on pfsck if it is supported + do_node $node $E2FSCK -h 2>&1 | grep -qw -- -m && cmd+=" -m8" echo $cmd do_node $node $cmd 2>&1 | tee $log rc=${PIPESTATUS[0]} @@ -5649,30 +5927,30 @@ check_and_cleanup_lustre() { # General functions wait_for_function () { - local quiet="" + local quiet="" - # suppress fn both stderr and stdout - if [ "$1" = "--quiet" ]; then - shift - quiet=" > /dev/null 2>&1" - - fi + # suppress fn both stderr and stdout + if [ "$1" = "--quiet" ]; then + shift + quiet=" > /dev/null 2>&1" + fi - local fn=$1 - local max=${2:-900} - local sleep=${3:-5} + local fn=$1 + local max=${2:-900} + local sleep=${3:-5} - local wait=0 + local wait=0 - while true; do + while true; do - eval $fn $quiet && return 0 + eval $fn $quiet && return 0 - wait=$((wait + sleep)) - [ $wait -lt $max ] || return 1 - echo waiting $fn, $((max - wait)) secs left ... - sleep $sleep - done + [ $wait -lt $max ] || return 1 + echo waiting $fn, $((max - wait)) secs left ... + wait=$((wait + sleep)) + [ $wait -gt $max ] && ((sleep -= wait - max)) + sleep $sleep + done } check_network() { @@ -5682,13 +5960,10 @@ check_network() { [ "$host" = "$HOSTNAME" ] && return 0 - echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..." - if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then - echo "Network not available!" + if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep; then + echo "$(date +'%H:%M:%S (%s)') waited for $host network ${max}s" exit 1 fi - - echo "$(date +'%H:%M:%S (%s)') network interface is UP" } no_dsh() { @@ -5806,10 +6081,6 @@ at_max_get() { at_get $1 at_max } -at_min_get() { - at_get $1 at_min -} - at_max_set() { local at_max=$1 shift @@ -5965,9 +6236,9 @@ cancel_lru_locks() { default_lru_size() { - NR_CPU=$(grep -c "processor" /proc/cpuinfo) - DEFAULT_LRU_SIZE=$((100 * NR_CPU)) - echo "$DEFAULT_LRU_SIZE" + local nr_cpu=$(grep -c "processor" /proc/cpuinfo) + + echo $((100 * nr_cpu)) } lru_resize_enable() @@ -5977,7 +6248,10 @@ lru_resize_enable() lru_resize_disable() { - lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size) + local dev=${1} + local lru_size=${2:-$(default_lru_size)} + + $LCTL set_param ldlm.namespaces.*$dev*.lru_size=$lru_size } flock_is_enabled() @@ -6132,9 +6406,14 @@ error_noexit() { exit_status () { local status=0 - local log=$TESTSUITELOG + local logs="$TESTSUITELOG $1" + + for log in $logs; do + if [ -f "$log" ]; then + grep -qw FAIL $log && status=1 + fi + done - [ -f "$log" ] && grep -qw FAIL $log && status=1 exit $status } @@ -6207,6 +6486,37 @@ skip() { exit 0 } +# +# For interop testing treate EOPNOTSUPP as success +# and skip +# +skip_eopnotsupp() { + local retstr=$@ + + echo $retstr | awk -F'|' '{print $1}' | + grep -E unsupported\|"(Operation not supported)" + (( $? == 0 )) || error "$retstr" + skip $retstr +} + +# Add a list of tests to ALWAYS_EXCEPT due to an issue. +# Usage: always_except LU-4815 23 42q ... +# +function always_except() { + local issue="${1:-}" # single jira style issue ("LU-4815") + local test_num + + shift + + if ! [[ "$issue" =~ ^[[:upper:]]+-[[:digit:]]+$ ]]; then + error "always_except: invalid issue '$issue' for tests '$*'" + fi + + for test_num in "$@"; do + ALWAYS_EXCEPT+=" $test_num" + done +} + build_test_filter() { EXCEPT="$EXCEPT $(testslist_filter)" @@ -6220,22 +6530,22 @@ build_test_filter() { fi done - [ "$EXCEPT$ALWAYS_EXCEPT" ] && \ - log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" - [ "$EXCEPT_SLOW" ] && \ - log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" - for E in $EXCEPT; do - eval EXCEPT_${E}=true - done - for E in $ALWAYS_EXCEPT; do - eval EXCEPT_ALWAYS_${E}=true - done - for E in $EXCEPT_SLOW; do - eval EXCEPT_SLOW_${E}=true - done - for G in $GRANT_CHECK_LIST; do - eval GCHECK_ONLY_${G}=true - done + [ "$EXCEPT$ALWAYS_EXCEPT" ] && + log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`" + [ "$EXCEPT_SLOW" ] && + log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" + for E in $EXCEPT; do + eval EXCEPT_${E}=true + done + for E in $ALWAYS_EXCEPT; do + eval EXCEPT_ALWAYS_${E}=true + done + for E in $EXCEPT_SLOW; do + eval EXCEPT_SLOW_${E}=true + done + for G in $GRANT_CHECK_LIST; do + eval GCHECK_ONLY_${G}=true + done } basetest() { @@ -6391,10 +6701,10 @@ check_mds() { } reset_fail_loc () { - echo -n "Resetting fail_loc on all nodes..." - do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \ - fail_val=0 2>/dev/null" || true - echo done. + #echo -n "Resetting fail_loc on all nodes..." + do_nodes --quiet $(comma_list $(nodes_list)) \ + "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true + #echo done. } @@ -6403,7 +6713,8 @@ reset_fail_loc () { # Also appends a timestamp and prepends the testsuite name. # -EQUALS="====================================================================================================" +# ======================================================== 15:06:12 (1624050372) +EQUALS="========================================================" banner() { msg="== ${TESTSUITE} $*" last=${msg: -1:1} @@ -6419,7 +6730,7 @@ check_dmesg_for_errors() { ldiskfs_check_descriptors: Checksum for group 0 failed\|\ group descriptors corrupted" - res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors") + res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors") [ -z "$res" ] && return 0 echo "Kernel error detected: $res" return 1 @@ -6469,10 +6780,8 @@ run_one_logged() { local testmsg=$2 export tfile=f${testnum}.${TESTSUITE} export tdir=d${testnum}.${TESTSUITE} - local name=$TESTSUITE.$TESTNAME.test_log.$(hostname -s).log - local test_log=$LOGDIR/$name - local zfs_log_name=$TESTSUITE.$TESTNAME.zfs_log - local zfs_debug_log=$LOGDIR/$zfs_log_name + local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log + local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log local SAVE_UMASK=$(umask) local rc=0 umask 0022 @@ -6489,7 +6798,7 @@ run_one_logged() { # remove temp files between repetitions to avoid test failures [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] && - rm -rf $DIR/$tdir* $DIR/$tfile* + rm -rvf $DIR/$tdir* $DIR/$tfile* # loop around subshell so stack_trap EXIT triggers each time (run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log rc=${PIPESTATUS[0]} @@ -6512,7 +6821,7 @@ run_one_logged() { TEST_STATUS="PASS" fi - pass "$testnum" "($((SECONDS - before))s)" + pass "$testnum" "(${duration_sub}s)" log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error" [[ $rc != 0 ]] && break done @@ -6544,10 +6853,27 @@ skip_logged(){ log_sub_test_end "SKIP" "0" "0" "$@" } -canonical_path() { - (cd $(dirname $1); echo $PWD/$(basename $1)) +grant_from_clients() { + local nodes="$1" + + # get client grant + do_nodes $nodes "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" | + calc_sum } +grant_from_servers() { + local nodes="$1" + + # get server grant + # which is tot_granted less grant_precreate + do_nodes $nodes "$LCTL get_param obdfilter.${FSNAME}-OST*.tot_granted" \ + " obdfilter.${FSNAME}-OST*.tot_pending" \ + " obdfilter.${FSNAME}-OST*.grant_precreate" | + tr '=' ' ' | awk '/tot_granted/{ total += $2 }; + /tot_pending/{ total -= $2 }; + /grant_precreate/{ total -= $2 }; + END { printf("%0.0f", total) }' +} check_grant() { export base=$(basetest $1) @@ -6559,37 +6885,37 @@ check_grant() { echo -n "checking grant......" + local osts=$(comma_list $(osts_nodes)) local clients=$CLIENTS [ -z "$clients" ] && clients=$(hostname) # sync all the data and make sure no pending data on server do_nodes $clients sync - clients_up # initiate all idling connections + do_nodes $clients $LFS df # initiate all idling connections # get client grant - client_grant=$(do_nodes $clients \ - "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" | - awk '{ total += $1 } END { printf("%0.0f", total) }') + cli_grant=$(grant_from_clients $clients) # get server grant # which is tot_granted less grant_precreate - server_grant=$(do_nodes $(comma_list $(osts_nodes)) \ - "$LCTL get_param "\ - "obdfilter.${FSNAME}-OST*.{tot_granted,tot_pending,grant_precreate}" | - sed 's/=/ /'| awk '/tot_granted/{ total += $2 }; - /tot_pending/{ total -= $2 }; - /grant_precreate/{ total -= $2 }; - END { printf("%0.0f", total) }') + srv_grant=$(grant_from_servers $osts) + count=0 # check whether client grant == server grant - if [[ $client_grant -ne $server_grant ]]; then + while [[ $cli_grant != $srv_grant && count++ -lt 30 ]]; do + echo "wait for client:$cli_grant == server:$srv_grant" + sleep 1 + cli_grant=$(grant_from_clients $clients) + srv_grant=$(grant_from_servers $osts) + done + if [[ $cli_grant -ne $srv_grant ]]; then do_nodes $(comma_list $(osts_nodes)) \ "$LCTL get_param obdfilter.${FSNAME}-OST*.tot*" \ - "obdfilter.${FSNAME}-OST*.grant_*" + "obdfilter.${FSNAME}-OST*.grant_*" do_nodes $clients "$LCTL get_param osc.${FSNAME}-*.cur_*_bytes" - error "failed: client:${client_grant} server: ${server_grant}." + error "failed grant check: client:$cli_grant server:$srv_grant" else - echo "pass: client:${client_grant} server: ${server_grant}" + echo "pass grant check: client:$cli_grant server:$srv_grant" fi } @@ -6612,7 +6938,7 @@ ostuuid_from_index() } ostname_from_index() { - local uuid=$(ostuuid_from_index $1) + local uuid=$(ostuuid_from_index $1 $2) echo ${uuid/_UUID/} } @@ -6646,7 +6972,34 @@ host_id() { # Description: # Returns list of ip addresses for each interface local_addr_list() { - ip addr | awk '/inet\ / {print $2}' | awk -F\/ '{print $1}' + ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}' +} + +# Description: +# Returns list of interfaces configured for LNet +lnet_if_list() { + local nids=( $($LCTL list_nids | xargs echo) ) + + [[ -z ${nids[@]} ]] && + return 0 + + declare -a INTERFACES + + for ((i = 0; i < ${#nids[@]}; i++)); do + ip=$(sed 's/^\(.*\)@.*$/\1/'<<<${nids[i]}) + INTERFACES[i]=$(ip -o a s | + awk '$4 ~ /^'$ip'\//{print $2}') + INTERFACES=($(echo "${INTERFACES[@]}" | tr ' ' '\n' | uniq | tr '\n' ' ')) + if [[ -z ${INTERFACES[i]} ]]; then + error "Can't determine interface name for NID ${nids[i]}" + elif [[ 1 -ne $(wc -w <<<${INTERFACES[i]}) ]]; then + error "Found $(wc -w <<<${INTERFACES[i]}) interfaces for NID ${nids[i]}. Expect 1" + fi + done + + echo "${INTERFACES[@]}" + + return 0 } is_local_addr() { @@ -6770,11 +7123,6 @@ osts_nodes () { echo -n $(facets_nodes $(get_facets OST)) } -# Get all of the active AGT (HSM agent) nodes. -agts_nodes () { - echo -n $(facets_nodes $(get_facets AGT)) -} - # Get all of the client nodes and active server nodes. nodes_list () { local nodes=$HOSTNAME @@ -6872,20 +7220,23 @@ init_clients_lists () { # Sanity check: exclude the dup entries RCLIENTS=$(for i in ${rclients//,/ }; do echo $i; done | sort -u) - clients="$SINGLECLIENT $HOSTNAME $RCLIENTS" + export CLIENT1=${CLIENT1:-$HOSTNAME} + export SINGLECLIENT=$CLIENT1 + + clients="$SINGLECLIENT $HOSTNAME $RCLIENTS" # Sanity check: exclude the dup entries from CLIENTS # for those configs which has SINGLCLIENT set to local client clients=$(for i in $clients; do echo $i; done | sort -u) - CLIENTS=$(comma_list $clients) + export CLIENTS=$(comma_list $clients) local -a remoteclients=($RCLIENTS) for ((i=0; $i<${#remoteclients[@]}; i++)); do varname=CLIENT$((i + 2)) - eval $varname=${remoteclients[i]} + eval export $varname=${remoteclients[i]} done - CLIENTCOUNT=$((${#remoteclients[@]} + 1)) + export CLIENTCOUNT=$((${#remoteclients[@]} + 1)) } get_random_entry () { @@ -6914,12 +7265,6 @@ get_node_count() { echo $nodes | wc -w || true } -mixed_ost_devs () { - local nodes=$(osts_nodes) - local osscount=$(get_node_count "$nodes") - [ ! "$OSTCOUNT" = "$osscount" ] -} - mixed_mdt_devs () { local nodes=$(mdts_nodes) local mdtcount=$(get_node_count "$nodes") @@ -6944,23 +7289,6 @@ get_stripe () { rm -f $file } -setstripe_nfsserver () { - local dir=$1 - local nfsexportdir=$2 - shift - shift - - local -a nfsexport=($(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \ - { print $1 }' /proc/mounts | cut -f 1 -d :)) - - # check that only one nfs mounted - [[ -z $nfsexport ]] && echo "$dir is not nfs mounted" && return 1 - (( ${#nfsexport[@]} == 1 )) || - error "several nfs mounts found for $dir: ${nfsexport[@]} !" - - do_nodev ${nfsexport[0]} lfs setstripe $nfsexportdir "$@" -} - # Check and add a test group. add_group() { local group_id=$1 @@ -7019,35 +7347,46 @@ add_user() { } check_runas_id_ret() { - local myRC=0 - local myRUNAS_UID=$1 - local myRUNAS_GID=$2 - shift 2 - local myRUNAS=$@ - if [ -z "$myRUNAS" ]; then - error_exit "myRUNAS command must be specified for check_runas_id" - fi - if $GSS_KRB5; then - $myRUNAS krb5_login.sh || \ - error "Failed to refresh Kerberos V5 TGT for UID $myRUNAS_ID." - fi - mkdir $DIR/d0_runas_test - chmod 0755 $DIR - chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test - $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$? - rm -rf $DIR/d0_runas_test - return $myRC + local myRC=0 + local myRUNAS_UID=$1 + local myRUNAS_GID=$2 + shift 2 + local myRUNAS=$@ + + if [ -z "$myRUNAS" ]; then + error_exit "check_runas_id_ret requires myRUNAS argument" + fi + + $myRUNAS true || + error "Unable to execute $myRUNAS" + + id $myRUNAS_UID > /dev/null || + error "Invalid RUNAS_ID $myRUNAS_UID. Please set RUNAS_ID to " \ + "some UID which exists on MDS and client or add user " \ + "$myRUNAS_UID:$myRUNAS_GID on these nodes." + + if $GSS_KRB5; then + $myRUNAS krb5_login.sh || + error "Failed to refresh krb5 TGT for UID $myRUNAS_ID." + fi + mkdir $DIR/d0_runas_test + chmod 0755 $DIR + chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test + $myRUNAS -u $myRUNAS_UID -g $myRUNAS_GID touch $DIR/d0_runas_test/f$$ || + myRC=$? + rm -rf $DIR/d0_runas_test + return $myRC } check_runas_id() { - local myRUNAS_UID=$1 - local myRUNAS_GID=$2 - shift 2 - local myRUNAS=$@ - check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \ - error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_UID. - Please set RUNAS_ID to some UID which exists on MDS and client or - add user $myRUNAS_UID:$myRUNAS_GID on these nodes." + local myRUNAS_UID=$1 + local myRUNAS_GID=$2 + shift 2 + local myRUNAS=$@ + + check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \ + error "unable to write to $DIR/d0_runas_test as " \ + "UID $myRUNAS_UID." } # obtain the UID/GID for MPI_USER @@ -7109,16 +7448,16 @@ multiop_bg_pause() { } do_and_time () { - local cmd=$1 - local rc - - SECONDS=0 - eval '$cmd' + local cmd="$1" + local start + local rc - [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 + start=$SECONDS + eval '$cmd' + [ ${PIPESTATUS[0]} -eq 0 ] || rc=1 - echo $SECONDS - return $rc + echo $((SECONDS - start)) + return $rc } inodes_available () { @@ -7155,6 +7494,29 @@ calc_osc_kbytes () { $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum } +free_min_max () { + wait_delete_completed + AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail)) + echo "OST kbytes available: ${AVAIL[*]}" + MAXV=${AVAIL[0]} + MAXI=0 + MINV=${AVAIL[0]} + MINI=0 + for ((i = 0; i < ${#AVAIL[@]}; i++)); do + #echo OST $i: ${AVAIL[i]}kb + if [[ ${AVAIL[i]} -gt $MAXV ]]; then + MAXV=${AVAIL[i]} + MAXI=$i + fi + if [[ ${AVAIL[i]} -lt $MINV ]]; then + MINV=${AVAIL[i]} + MINI=$i + fi + done + echo "Min free space: OST $MINI: $MINV" + echo "Max free space: OST $MAXI: $MAXV" +} + # save_lustre_params(comma separated facet list, parameter_mask) # generate a stream of formatted strings ( =) save_lustre_params() { @@ -7187,19 +7549,18 @@ restore_lustre_params() { check_node_health() { local nodes=${1:-$(comma_list $(nodes_list))} - - for node in ${nodes//,/ }; do - check_network "$node" 5 - if [ $? -eq 0 ]; then - do_node $node "rc=0; - val=\\\$($LCTL get_param -n catastrophe 2>&1); - if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then - echo \\\$(hostname -s): \\\$val; - rc=\\\$val; - fi; - exit \\\$rc" || error "$node:LBUG/LASSERT detected" - fi - done + local health=$TMP/node_health.$$ + + do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health | + grep "catastrophe=1" && error "LBUG/LASSERT detected" + # Only check/report network health if get_param isn't reported, since + # *clearly* the network is working if get_param returned something. + if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then + for node in ${nodes//,/}; do + check_network $node 5 + done + fi + rm -f $health } mdsrate_cleanup () { @@ -7210,11 +7571,6 @@ mdsrate_cleanup () { fi } -delayed_recovery_enabled () { - local var=${SINGLEMDS}_svc - do_facet $SINGLEMDS lctl get_param -n mdd.${!var}.stale_export_age > /dev/null 2>&1 -} - ######################## convert_facet2label() { @@ -7239,20 +7595,6 @@ get_clientosc_proc_path() { echo "${1}-osc-[-0-9a-f]*" } -# If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names -# used by MDT would not be changed. -# mdt lov: fsname-mdtlov -# mdt osc: fsname-OSTXXXX-osc -mds_on_old_device() { - local mds=${1:-"$SINGLEMDS"} - - if [ $(lustre_version_code $mds) -gt $(version_code 1.9.0) ]; then - do_facet $mds "lctl list_param osc.$FSNAME-OST*-osc \ - > /dev/null 2>&1" && return 0 - fi - return 1 -} - get_mdtosc_proc_path() { local mds_facet=$1 local ost_label=${2:-"*OST*"} @@ -7283,48 +7625,49 @@ get_osc_import_name() { } _wait_import_state () { - local expected=$1 - local CONN_PROC=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local CONN_STATE - local i=0 + local expected="$1" + local CONN_PROC="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local CONN_STATE + local i=0 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do - if [ "${expected}" == "DISCONN" ]; then - # for disconn we can check after proc entry is removed - [ "x${CONN_STATE}" == "x" ] && return 0 - # with AT enabled, we can have connect request timeout near of - # reconnect timeout and test can't see real disconnect - [ "${CONN_STATE}" == "CONNECTING" ] && return 0 - fi - if [ $i -ge $maxtime ]; then - [ $error_on_failure -ne 0 ] && \ - error "can't put import for $CONN_PROC into ${expected}" \ - "state after $i sec, have ${CONN_STATE}" - return 1 - fi - sleep 1 - # Add uniq for multi-mount case - CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - i=$(($i + 1)) - done + while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do + if [[ "${expected}" == "DISCONN" ]]; then + # for disconn we can check after proc entry is removed + [[ -z "${CONN_STATE}" ]] && return 0 + # with AT, we can have connect request timeout near + # reconnect timeout and test can't see real disconnect + [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0 + fi + if (( $i >= $maxtime )); then + (( $err_on_fail != 0 )) && + error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}" + return 1 + fi + sleep 1 + # Add uniq for multi-mount case + CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | + cut -f2 | uniq) + i=$((i + 1)) + done - log "$CONN_PROC in ${CONN_STATE} state after $i sec" - return 0 + log "$CONN_PROC in ${CONN_STATE} state after $i sec" + return 0 } wait_import_state() { - local state=$1 - local params=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local param - - for param in ${params//,/ }; do - _wait_import_state $state $param $maxtime $error_on_failure || return - done + local expected="$1" + local params="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local param + + for param in ${params//,/ }; do + _wait_import_state "$expected" "$param" $maxtime $err_on_fail || + return + done } wait_import_state_mount() { @@ -7332,7 +7675,7 @@ wait_import_state_mount() { return 0 fi - wait_import_state $* + wait_import_state "$@" } # One client request could be timed out because server was not ready @@ -7412,8 +7755,9 @@ _wait_osc_import_state() { params=$param fi + local plist=$(comma_list $params) if ! do_rpc_nodes "$(facet_active_host $facet)" \ - wait_import_state $expected "$params" $maxtime; then + wait_import_state $expected $plist $maxtime; then error "$facet: import is not in $expected state after $maxtime" return 1 fi @@ -7468,8 +7812,9 @@ _wait_mgc_import_state() { params=$($LCTL list_param $param 2>/dev/null || true) done fi + local plist=$(comma_list $params) if ! do_rpc_nodes "$(facet_active_host $facet)" \ - wait_import_state $expected "$params" $maxtime \ + wait_import_state $expected $plist $maxtime \ $error_on_failure; then if [ $error_on_failure -ne 0 ]; then error "import is not in ${expected} state" @@ -7516,6 +7861,10 @@ get_clientmgc_proc_path() { } do_rpc_nodes () { + local quiet + + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + local list=$1 shift @@ -7525,15 +7874,14 @@ do_rpc_nodes () { local LIBPATH="/usr/lib/lustre/tests:/usr/lib64/lustre/tests:" local TESTPATH="$RLUSTRE/tests:" local RPATH="PATH=${TESTPATH}${LIBPATH}${PATH}:/sbin:/bin:/usr/sbin:" - do_nodesv $list "${RPATH} NAME=${NAME} bash rpc.sh $@ " + do_nodes ${quiet:-"--verbose"} $list "${RPATH} NAME=${NAME} bash rpc.sh $@ " } wait_clients_import_state () { - local list=$1 - local facet=$2 - local expected=$3 - - local facets=$facet + local list="$1" + local facet="$2" + local expected="$3" + local facets="$facet" if [ "$FAILURE_MODE" = HARD ]; then facets=$(facets_on_host $(facet_active_host $facet)) @@ -7544,11 +7892,11 @@ wait_clients_import_state () { local proc_path case $facet in ost* ) proc_path="osc.$(get_clientosc_proc_path \ - $label).ost_server_uuid" ;; + $label).ost_server_uuid" ;; mds* ) proc_path="mdc.$(get_clientmdc_proc_path \ - $label).mds_server_uuid" ;; + $label).mds_server_uuid" ;; mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \ - $label).mgs_server_uuid" ;; + $label).mgs_server_uuid" ;; *) error "unknown facet!" ;; esac @@ -7646,6 +7994,7 @@ check_pool_not_exist() { create_pool() { local fsname=${1%%.*} local poolname=${1##$fsname.} + local keep_pools=${2:-false} stack_trap "destroy_test_pools $fsname" EXIT do_facet mgs lctl pool_new $1 @@ -7664,7 +8013,7 @@ create_pool() { wait_update $HOSTNAME "lctl get_param -n lov.$fsname-*.pools.$poolname \ 2>/dev/null || echo foo" "" || error "pool_new failed $1" - add_pool_to_list $1 + $keep_pools || add_pool_to_list $1 return $RC } @@ -7682,10 +8031,18 @@ remove_pool_from_list () { local poolname=${1##$fsname.} local listvar=${fsname}_CREATED_POOLS - local temp=${listvar}=$(exclude_items_from_list ${!listvar} $poolname) + local temp=${listvar}=$(exclude_items_from_list "${!listvar}" $poolname) eval export $temp } +# cleanup all pools exist on $FSNAME +destroy_all_pools () { + local i + for i in $(list_pool $FSNAME); do + destroy_pool $i + done +} + destroy_pool_int() { local ost local OSTS=$(list_pool $1) @@ -7706,8 +8063,7 @@ destroy_pool() { local RC - check_pool_not_exist $fsname.$poolname - [[ $? -eq 0 ]] && return 0 + check_pool_not_exist $fsname.$poolname && return 0 || true destroy_pool_int $fsname.$poolname RC=$? @@ -7801,8 +8157,11 @@ gather_logs () { fi if [ ! -f $LOGDIR/shared ]; then - do_nodes $list rsync -az "${prefix}.*.${suffix}" \ - $HOSTNAME:$LOGDIR + local remote_nodes=$(exclude_items_from_list $list $HOSTNAME) + + for node in ${remote_nodes//,/ }; do + rsync -az -e ssh $node:${prefix}.'*'.${suffix} $LOGDIR & + done fi } @@ -8316,13 +8675,10 @@ init_logging() { umask $save_umask - # If modules are not yet loaded then older "lctl lustre_build_version" - # will fail. Use lctl build version instead. - log "Client: $($LCTL lustre_build_version)" - log "MDS: $(do_facet $SINGLEMDS $LCTL lustre_build_version 2>/dev/null|| - do_facet $SINGLEMDS $LCTL --version)" - log "OSS: $(do_facet ost1 $LCTL lustre_build_version 2> /dev/null || - do_facet ost1 $LCTL --version)" + # log actual client and server versions if needed for debugging + log "Client: $(lustre_build_version client)" + log "MDS: $(lustre_build_version mds1)" + log "OSS: $(lustre_build_version ost1)" } log_test() { @@ -8343,124 +8699,34 @@ log_sub_test_end() { run_llverdev() { - local dev=$1 - local llverdev_opts=$2 - local devname=$(basename $1) - local size=$(grep "$devname"$ /proc/partitions | awk '{print $3}') - # loop devices aren't in /proc/partitions - [ "x$size" == "x" ] && local size=$(ls -l $dev | awk '{print $5}') + local dev=$1; shift + local llverdev_opts="$*" + local devname=$(basename $dev) + local size=$(awk "/$devname$/ {print \$3}" /proc/partitions) + # loop devices aren't in /proc/partitions + [[ -z "$size" ]] && size=$(stat -c %s $dev) - size=$(($size / 1024 / 1024)) # Gb + local size_gb=$((size / 1024 / 1024)) # Gb - local partial_arg="" - # Run in partial (fast) mode if the size - # of a partition > 1 GB - [ $size -gt 1 ] && partial_arg="-p" + local partial_arg="" + # Run in partial (fast) mode if the size of a partition > 1 GB + (( $size == 0 || $size_gb > 1 )) && partial_arg="-p" - llverdev --force $partial_arg $llverdev_opts $dev + llverdev --force $partial_arg $llverdev_opts $dev } run_llverfs() { - local dir=$1 - local llverfs_opts=$2 - local use_partial_arg=$3 - local partial_arg="" - local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB - - # Run in partial (fast) mode if the size - # of a partition > 1 GB - [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p" - - llverfs $partial_arg $llverfs_opts $dir -} - -#Remove objects from OST -remove_ost_objects() { - local facet=$1 - local ostdev=$2 - local group=$3 - shift 3 - local objids="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$OST_MOUNT_OPTS - local i - local rc - - echo "removing objects from $ostdev on $facet: $objids" - if ! test -b $ostdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $ostdev $mntpt || - return $? - rc=0 - for i in $objids; do - rm $mntpt/O/$group/d$((i % 32))/$i || { rc=$?; break; } - done - umount -f $mntpt || return $? - return $rc -} - -#Remove files from MDT -remove_mdt_files() { - local facet=$1 - local mdtdev=$2 - shift 2 - local files="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$MDS_MOUNT_OPTS - - echo "removing files from $mdtdev on $facet: $files" - if [ $(facet_fstype $facet) == ldiskfs ] && - ! do_facet $facet test -b $mdtdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt || - return $? - rc=0 - for f in $files; do - rm $mntpt/ROOT/$f || { rc=$?; break; } - done - umount -f $mntpt || return $? - return $rc -} - -duplicate_mdt_files() { - local facet=$1 - local mdtdev=$2 - shift 2 - local files="$@" - local mntpt=$(facet_mntpt $facet) - local opts=$MDS_MOUNT_OPTS + local dir=$1 + local llverfs_opts=$2 + local use_partial_arg=$3 + local partial_arg="" + local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB - echo "duplicating files on $mdtdev on $facet: $files" - mkdir -p $mntpt || return $? - if [ $(facet_fstype $facet) == ldiskfs ] && - ! do_facet $facet test -b $mdtdev; then - opts=$(csa_add "$opts" -o loop) - fi - mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt || - return $? + # Run in partial (fast) mode if the size of a partition > 1 GB + [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p" - do_umount() { - trap 0 - popd > /dev/null - rm $tmp - umount -f $mntpt - } - trap do_umount EXIT - - tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX) - pushd $mntpt/ROOT > /dev/null || return $? - rc=0 - for f in $files; do - touch $f.bad || return $? - getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp - rc=${PIPESTATUS[0]} - [ $rc -eq 0 ] || return $rc - setfattr --restore $tmp || return $? - done - do_umount + llverfs $partial_arg $llverfs_opts $dir } run_sgpdd () { @@ -8550,22 +8816,11 @@ get_block_count() { echo -n ${count:-0} } -# Get the block size of the filesystem. -get_block_size() { - local facet=$1 - local device=$2 - local size - - [ -z "$CLIENTONLY" ] && size=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" | - awk '/^Block size:/ {print $3}') - echo -n ${size:-0} -} - # Check whether the "ea_inode" feature is enabled or not, to allow # ldiskfs xattrs over one block in size. Allow both the historical # Lustre feature name (large_xattr) and the upstream name (ea_inode). large_xattr_enabled() { - [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1 + [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0 local mds_dev=$(mdsdevname ${SINGLEMDS//mds/}) @@ -8614,7 +8869,6 @@ mds_backup_restore() { local devname=$(mdsdevname $(facet_number $facet)) local mntpt=$(facet_mntpt brpt) local rcmd="do_facet $facet" - local metaea=${TMP}/backup_restore.ea local metadata=${TMP}/backup_restore.tgz local opts=${MDS_MOUNT_FS_OPTS} local svc=${facet}_svc @@ -8628,41 +8882,36 @@ mds_backup_restore() { # step 1: build mount point ${rcmd} mkdir -p $mntpt # step 2: cleanup old backup - ${rcmd} rm -f $metaea $metadata + ${rcmd} rm -f $metadata # step 3: mount dev - ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1 + ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 3 if [ ! -z $igif ]; then # step 3.5: rm .lustre - ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 1 + ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 3 fi - # step 4: backup metaea - echo "backup EA" - ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" || - return 2 - # step 5: backup metadata + # step 4: backup metadata echo "backup data" - ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3 - # step 6: umount - ${rcmd} $UMOUNT $mntpt || return 4 - # step 8: reformat dev + ${rcmd} tar zcf $metadata --xattrs --xattrs-include="trusted.*" \ + --sparse -C $mntpt/ . > /dev/null 2>&1 || return 4 + # step 5: umount + ${rcmd} $UMOUNT $mntpt || return 5 + # step 6: reformat dev echo "reformat new device" format_mdt $(facet_number $facet) - # step 9: mount dev + # step 7: mount dev ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7 - # step 10: restore metadata + # step 8: restore metadata echo "restore data" - ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8 - # step 11: restore metaea - echo "restore EA" - ${rcmd} "cd $mntpt && setfattr --restore=$metaea && cd - " || return 9 - # step 12: remove recovery logs + ${rcmd} tar zxfp $metadata --xattrs --xattrs-include="trusted.*" \ + --sparse -C $mntpt > /dev/null 2>&1 || return 8 + # step 9: remove recovery logs echo "remove recovery logs" ${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS - # step 13: umount dev + # step 10: umount dev ${rcmd} $UMOUNT $mntpt || return 10 - # step 14: cleanup tmp backup + # step 11: cleanup tmp backup ${rcmd} rm -f $metaea $metadata - # step 15: reset device label - it's not virgin on + # step 12: reset device label - it's not virgin on ${rcmd} e2label $devname ${!svc} } @@ -8715,16 +8964,19 @@ generate_logname() { test_mkdir() { local path local p_option + local hash_type + local hash_name=("all_char" "fnv_1a_64" "crush") local dirstripe_count=${DIRSTRIPE_COUNT:-"2"} local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))} local OPTIND=1 - while getopts "c:i:p" opt; do + while getopts "c:H:i:p" opt; do case $opt in c) dirstripe_count=$OPTARG;; + H) hash_type=$OPTARG;; i) dirstripe_index=$OPTARG;; p) p_option="-p";; - \?) error "only support -i -c -p";; + \?) error "only support -c -H -i -p";; esac done @@ -8732,9 +8984,8 @@ test_mkdir() { [ $# -eq 1 ] || error "Only creating single directory is supported" path="$*" + local parent=$(dirname $path) if [ "$p_option" == "-p" ]; then - local parent=$(dirname $path) - [ -d $path ] && return 0 if [ ! -d ${parent} ]; then mkdir -p ${parent} || @@ -8742,7 +8993,7 @@ test_mkdir() { fi fi - if [ $MDSCOUNT -le 1 ]; then + if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then mkdir $path || error "mkdir '$path' failed" else local mdt_index @@ -8753,6 +9004,10 @@ test_mkdir() { mdt_index=$dirstripe_index fi + # randomly choose hash type + [ -z "$hash_type" ] && + hash_type=${hash_name[$((RANDOM % ${#hash_name[@]}))]} + if (($MDS1_VERSION >= $(version_code 2.8.0))); then if [ $dirstripe_count -eq -1 ]; then dirstripe_count=$((RANDOM % MDSCOUNT + 1)) @@ -8761,9 +9016,9 @@ test_mkdir() { dirstripe_count=1 fi - echo "striped dir -i$mdt_index -c$dirstripe_count $path" - $LFS mkdir -i$mdt_index -c$dirstripe_count $path || - error "mkdir -i $mdt_index -c$dirstripe_count $path failed" + echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path" + $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path || + error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed" fi } @@ -8790,7 +9045,7 @@ check_mount_and_prep() is_mounted $MOUNT || setupall rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!" - mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." + mkdir_on_mdt0 $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." for idx in $(seq $MDSCOUNT); do local name="MDT$(printf '%04x' $((idx - 1)))" rm -rf $MOUNT/.lustre/lost+found/$name/* @@ -8856,8 +9111,18 @@ pool_add_targets() { fi local t=$(for i in $list; do printf "$FSNAME-OST%04x_UUID " $i; done) + local tg=$(for i in $list; + do printf -- "-e $FSNAME-OST%04x_UUID " $i; done) + local firstx=$(printf "%04x" $first) + local lastx=$(printf "%04x" $last) + do_facet mgs $LCTL pool_add \ - $FSNAME.$pool $FSNAME-OST[$first-$last/$step] + $FSNAME.$pool $FSNAME-OST[$firstx-$lastx/$step] + # ignore EEXIST(17) + if (( $? != 0 && $? != 17 )); then + error_noexit "pool_add $FSNAME-OST[$firstx-$lastx/$step] failed" + return 3 + fi # wait for OSTs to be added to the pool for mds_id in $(seq $MDSCOUNT); do @@ -8865,23 +9130,16 @@ pool_add_targets() { local lodname=$FSNAME-MDT$(printf "%04x" $mdt_id)-mdtlov wait_update_facet mds$mds_id \ "lctl get_param -n lod.$lodname.pools.$pool | - sort -u | tr '\n' ' ' " "$t" || { + grep $tg | sort -u | tr '\n' ' '" "$t" || { error_noexit "mds$mds_id: Add to pool failed" - return 3 + return 2 } done - wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool \ - | sort -u | tr '\n' ' ' " "$t" || { + wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool | + grep $tg | sort -u | tr '\n' ' ' " "$t" || { error_noexit "Add to pool failed" return 1 } - local lfscount=$($LFS pool_list $FSNAME.$pool | grep -c "\-OST") - local addcount=$(((last - first) / step + 1)) - [ $lfscount -eq $addcount ] || { - error_noexit "lfs pool_list bad ost count" \ - "$lfscount != $addcount" - return 2 - } } pool_set_dir() { @@ -9001,10 +9259,21 @@ pool_file_rel_path() { pool_remove_first_target() { echo "Removing first target from a pool" + pool_remove_target $1 -1 +} + +pool_remove_target() { local pool=$1 + local index=$2 local pname="lov.$FSNAME-*.pools.$pool" - local t=$($LCTL get_param -n $pname | head -1) + if [ $index -eq -1 ]; then + local t=$($LCTL get_param -n $pname | head -1) + else + local t=$(printf "$FSNAME-OST%04x_UUID" $index) + fi + + echo "Removing $t from $pool" do_facet mgs $LCTL pool_remove $FSNAME.$pool $t for mds_id in $(seq $MDSCOUNT); do local mdt_id=$((mds_id-1)) @@ -9393,14 +9662,17 @@ check_clients_evicted() { local rc=0 for osc in $oscs; do - ((rc++)) echo "Check state for $osc" local evicted=$(do_facet client $LCTL get_param osc.$osc.state | - tail -n 3 | awk -F"[ [,]" \ - '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + tail -n 5 | awk -F"[ ,]" \ + '/EVICTED/ { if (mx<$4) { mx=$4; } } END { print mx }') if (($? == 0)) && (($evicted > $before)); then echo "$osc is evicted at $evicted" - ((rc--)) + else + ((rc++)) + echo "$osc was not evicted after $before:" + do_facet client $LCTL get_param osc.$osc.state | + tail -n 8 fi done @@ -9457,8 +9729,8 @@ changelog_register() { error "$mdt: changelog_mask=+hsm failed: $?" local cl_user - cl_user=$(do_facet $facet \ - $LCTL --device $mdt changelog_register -n) || + cl_user=$(do_facet $facet $LCTL --device $mdt \ + changelog_register -n $@) || error "$mdt: register changelog user failed: $?" stack_trap "__changelog_deregister $facet $cl_user" EXIT @@ -9545,6 +9817,7 @@ changelog_clear() { # so reorder to get same order than in changelog_register() local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort | tr "\n" " ") + local cl_user for facet in $cl_facets; do for cl_user in ${CL_USERS[$facet]}; do @@ -9556,12 +9829,24 @@ changelog_clear() { } changelog_dump() { + local rc + for M in $(seq $MDSCOUNT); do local facet=mds$M local mdt="$(facet_svc $facet)" - - $LFS changelog $mdt | sed -e 's/^/'$mdt'./' + local output + local ret + + output=$($LFS changelog $mdt) + ret=$? + if [ $ret -ne 0 ]; then + rc=${rc:-$ret} + elif [ -n "$output" ]; then + echo "$output" | sed -e 's/^/'$mdt'./' + fi done + + return ${rc:-0} } changelog_extract_field() { @@ -9910,11 +10195,16 @@ init_agt_vars() { export SINGLEAGT=${SINGLEAGT:-agt1} export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} + export HSMTOOL_PID_FILE=${HSMTOOL_PID_FILE:-"/var/run/lhsmtool_posix.pid"} export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") + export HSMTOOL_ARCHIVE_FORMAT=${HSMTOOL_ARCHIVE_FORMAT:-v2} + + if ! [[ $HSMTOOL =~ hsmtool ]]; then + echo "HSMTOOL = '$HSMTOOL' does not contain 'hsmtool', GLWT" >&2 + fi HSM_ARCHIVE_NUMBER=2 @@ -9948,44 +10238,26 @@ get_mdt_devices() { done } -search_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" +pkill_copytools() { + local hosts="$1" + local signal="$2" + + do_nodes "$hosts" "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool" } -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} +copytool_continue() { + local agents=${1:-$(facet_active_host $SINGLEAGT)} - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + pkill_copytools "$agents" CONT || return 0 + echo "Copytool is continued on $agents" } -wait_copytools() { +kill_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - local wait_timeout=200 - local wait_start=$SECONDS - local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second - - while ((SECONDS < wait_end)); do - if ! search_copytools $hosts; then - echo "copytools stopped in $((SECONDS - wait_start))s" - return 0 - fi - - echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") - done - - # try to dump Copytool's stack - do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \ - "echo t >/proc/sysrq-trigger" - echo "copytools failed to stop in ${wait_timeout}s" - - return 1 + echo "Killing existing copytools on $hosts" + pkill_copytools "$hosts" TERM || return 0 + copytool_continue "$hosts" } copytool_monitor_cleanup() { @@ -10020,28 +10292,29 @@ copytool_logfile() __lhsmtool_rebind() { - do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint" + do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --rebind "$@" "$mountpoint" } __lhsmtool_import() { mkdir -p "$(dirname "$2")" || error "cannot create directory '$(dirname "$2")'" - do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint" + do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --import "$@" "$mountpoint" } __lhsmtool_setup() { - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\"" + local host="$(facet_host "$facet")" + local cmd="$HSMTOOL ${hsmtool_options[@]} --daemon --pid-file=$HSMTOOL_PID_FILE" [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth" [ -n "$archive_id" ] && cmd+=" --archive $archive_id" - [ ${#misc_options[@]} -gt 0 ] && - cmd+=" $(IFS=" " echo "$@")" - cmd+=" \"$mountpoint\"" +# [ ${#misc_options[@]} -gt 0 ] && +# cmd+=" $(IFS=" " echo "$@")" + cmd+=" $@ \"$mountpoint\"" - echo "Starting copytool $facet on $(facet_host $facet)" - stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT - do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" + echo "Starting copytool '$facet' on '$host' with cmdline '$cmd'" + stack_trap "pkill_copytools $host TERM || true" EXIT + do_node "$host" "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" } hsm_root() { @@ -10074,7 +10347,17 @@ copytool() # Parse arguments local fail_on_error=true - local -a misc_options + local -a hsmtool_options=("--hsm-root=$hsm_root") + local -a action_options=() + + if [[ -n "$HSMTOOL_ARCHIVE_FORMAT" ]]; then + hsmtool_options+=("--archive-format=$HSMTOOL_ARCHIVE_FORMAT") + fi + + if [[ -n "$HSMTOOL_VERBOSE" ]]; then + hsmtool_options+=("$HSMTOOL_VERBOSE") + fi + while [ $# -gt 0 ]; do case "$1" in -f|--facet) @@ -10102,7 +10385,7 @@ copytool() ;; *) # Uncommon(/copytool dependent) option - misc_options+=("$1") + action_options+=("$1") ;; esac shift @@ -10118,7 +10401,7 @@ copytool() ;; esac - __${copytool}_${action} "${misc_options[@]}" + __${copytool}_${action} "${action_options[@]}" if [ $? -ne 0 ]; then local error_msg @@ -10128,8 +10411,8 @@ copytool() error_msg="Failed to start copytool $facet on '$host'" ;; import) - local src="${misc_options[0]}" - local dest="${misc_options[1]}" + local src="${action_options[0]}" + local dest="${action_options[1]}" error_msg="Failed to import '$src' to '$dest'" ;; rebind) @@ -10202,20 +10485,15 @@ mdts_set_param() { return $rc } -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - mdts_check_param() { local key="$1" local target="$2" local timeout="$3" local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ + wait_update_facet --verbose mds${mdtno} \ "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ $timeout || error "$key state is not '$target' on mds${mdtno}" @@ -10266,7 +10544,7 @@ wait_request_state() { local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - wait_result $mds "$cmd" "$state" 200 || + wait_update_facet --verbose $mds "$cmd" "$state" 200 || error "request on $fid is not $state on $mds" } @@ -10364,3 +10642,256 @@ sel_layout_sanity() { check_component_count $file $comp_cnt } +statx_supported() { + $STATX --quiet --version + return $? +} + +# +# wrappers for createmany and unlinkmany +# to set debug=0 if number of creates is high enough +# this is to speedup testing +# +function createmany() { + local count=${!#} + local rc + + if (( count > 100 )); then + debugsave + do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0 + fi + $LUSTRE/tests/createmany $* + rc=$? + debugrestore + + return $rc +} + +function unlinkmany() { + local count=${!#} + local rc + + if (( count > 100 )); then + debugsave + do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0 + fi + $LUSTRE/tests/unlinkmany $* + rc=$? + debugrestore + + return $rc +} + +# Check if fallocate supported on OSTs, enable if unset, default mode=0 +# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents) +function check_set_fallocate() +{ + local new_mode="$1" + local osts=$(comma_list $(osts_nodes)) + local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks" + local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null| + head -n 1) + + [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; } + [[ -z "$new_mode" && "$old_mode" != "-1" ]] && + { echo "keep default fallocate mode: $old_mode"; return 0; } + [[ "$new_mode" && "$old_mode" == "$new_mode" ]] && + { echo "keep current fallocate mode: $old_mode"; return 0; } + + stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode" + do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} || + error "set $fa_mode=$new_mode" +} + +# Check if fallocate supported on OSTs, enable if unset, skip if unavailable +function check_set_fallocate_or_skip() +{ + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + check_set_fallocate || skip "need at least 2.13.57 for fallocate" +} + +function disable_opencache() +{ + local state=$($LCTL get_param -n "llite.*.opencache_threshold_count" | head -1) + + test -z "${saved_OPENCACHE_value}" && + export saved_OPENCACHE_value="$state" + + [[ "$state" = "off" ]] && return + + $LCTL set_param -n "llite.*.opencache_threshold_count"=off +} + +function set_opencache() +{ + local newvalue="$1" + local state=$($LCTL get_param -n "llite.*.opencache_threshold_count") + + [[ -n "$newvalue" ]] || return + + [[ -n "${saved_OPENCACHE_value}" ]] || + export saved_OPENCACHE_value="$state" + + $LCTL set_param -n "llite.*.opencache_threshold_count"=$newvalue +} + + + +function restore_opencache() +{ + [[ -z "${saved_OPENCACHE_value}" ]] || + $LCTL set_param -n "llite.*.opencache_threshold_count"=${saved_OPENCACHE_value} +} + +# LU-13417: XXX lots of tests assume the directory to be created under MDT0, +# created on MDT0, use this function to create directory on specific MDT +# explicitly, and set default LMV to create subdirs on the same MDT too. +mkdir_on_mdt() { + local mdt + local OPTIND=1 + + while getopts "i:" opt $*; do + case $opt in + i) mdt=$OPTARG;; + esac + done + + shift $((OPTIND - 1)) + + $LFS mkdir -i $mdt -c 1 $* +} + +mkdir_on_mdt0() { + mkdir_on_mdt -i0 $* +} + +# Wait for nodemap synchronization +wait_nm_sync() { + local nodemap_name=$1 + local key=$2 + local value=$3 + local opt=$4 + local proc_param + local is_active=$(do_facet mgs $LCTL get_param -n nodemap.active) + local max_retries=20 + local is_sync + local out1="" + local out2 + local mgs_ip=$(host_nids_address $mgs_HOST $NETTYPE | cut -d' ' -f1) + local i + + if [ "$nodemap_name" == "active" ]; then + proc_param="active" + elif [ -z "$key" ]; then + proc_param=${nodemap_name} + else + proc_param="${nodemap_name}.${key}" + fi + if [ "$opt" == "inactive" ]; then + # check nm sync even if nodemap is not activated + is_active=1 + opt="" + fi + (( is_active == 0 )) && [ "$proc_param" != "active" ] && return + + if [ -z "$value" ]; then + out1=$(do_facet mgs $LCTL get_param $opt \ + nodemap.${proc_param} 2>/dev/null) + echo "On MGS ${mgs_ip}, ${proc_param} = $out1" + else + out1=$value; + fi + + # if servers run on the same node, it is impossible to tell if they get + # synced with the mgs, so just wait an arbitrary 10 seconds + if [ $(facet_active_host mgs) == $(facet_active_host mds) ] && + [ $(facet_active_host mgs) == $(facet_active_host ost1) ]; then + echo "waiting 10 secs for sync" + sleep 10 + return + fi + + # wait up to 10 seconds for other servers to sync with mgs + for i in $(seq 1 10); do + for node in $(all_server_nodes); do + local node_ip=$(host_nids_address $node $NETTYPE | + cut -d' ' -f1) + + is_sync=true + if [ -z "$value" ]; then + [ $node_ip == $mgs_ip ] && continue + fi + + out2=$(do_node $node_ip $LCTL get_param $opt \ + nodemap.$proc_param 2>/dev/null) + echo "On $node ${node_ip}, ${proc_param} = $out2" + [ "$out1" != "$out2" ] && is_sync=false && break + done + $is_sync && break + sleep 1 + done + if ! $is_sync; then + echo MGS + echo $out1 + echo OTHER - IP: $node_ip + echo $out2 + error "mgs and $nodemap_name ${key} mismatch, $i attempts" + fi + echo "waited $((i - 1)) seconds for sync" +} + +consume_precreations() { + local dir=$1 + local mfacet=$2 + local OSTIDX=$3 + local extra=${4:-2} + local OST=$(ostname_from_index $OSTIDX $dir) + + test_mkdir -p $dir/${OST} + $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST} + + # on the mdt's osc + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST) + local last_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_last_id) + local next_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_next_id) + echo "Creating to objid $last_id on ost $OST..." + createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra)) +} + +__exhaust_precreations() { + local OSTIDX=$1 + local FAILLOC=$2 + local FAILIDX=${3:-$OSTIDX} + local ofacet=ost$((OSTIDX + 1)) + + mkdir_on_mdt0 $DIR/$tdir + local mdtidx=$($LFS getstripe -m $DIR/$tdir) + local mfacet=mds$((mdtidx + 1)) + echo OSTIDX=$OSTIDX MDTIDX=$mdtidx + + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet) + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + +#define OBD_FAIL_OST_ENOSPC 0x215 + do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215 + + consume_precreations $DIR/$tdir $mfacet $OSTIDX + + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC +} + +exhaust_precreations() { + __exhaust_precreations $1 $2 $3 + sleep_maxage +} + +exhaust_all_precreations() { + local i + for (( i=0; i < OSTCOUNT; i++ )) ; do + __exhaust_precreations $i $1 -1 + done + sleep_maxage +}