X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=636a7ca60a7d0278820cd527b83955da5ded5d43;hp=6566c045f17c1309368ec10bdb2e4eb457492ae3;hb=56f69854dae234fa974ab4f1dff909aea601a592;hpb=04b2da6180d3c8eda21f7ab36c676462be041b74 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 6566c04..636a7ca 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -299,8 +299,11 @@ init_test_env() { [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} [ ! -f "$LFS" ] && export LFS=$(which lfs) + export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"} + [ ! -f "$KSOCKLND_CONFIG" ] && + export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null) - export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"} + export PERM_CMD=$(echo ${PERM_CMD:-"$LCTL conf_param"}) export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"} if [ ! -f "$L_GETIDENTITY" ]; then @@ -538,6 +541,17 @@ module_loaded () { /sbin/lsmod | grep -q "^\<$1\>" } +check_lfs_df_ret_val() { + # Ignore only EOPNOTSUPP (which is 95; Operation not supported) error + # returned by 'lfs df' for valid dentry but not a lustrefs. + # + # 'lfs df' historically always returned success(0) instead of + # EOPNOTSUPP. This function for compatibility reason, ignores and + # masquerades EOPNOTSUPP as success. + [[ $1 -eq 95 ]] && return 0 + return $1 +} + PRLFS=false lustre_insmod() { local module=$1 @@ -682,9 +696,9 @@ load_modules_local() { # that obviously has nothing to do with this Lustre run # Disable automatic memory scanning to avoid perf hit. if [ -f /sys/kernel/debug/kmemleak ] ; then - echo scan=off > /sys/kernel/debug/kmemleak - echo scan > /sys/kernel/debug/kmemleak - echo clear > /sys/kernel/debug/kmemleak + echo scan=off > /sys/kernel/debug/kmemleak || true + echo scan > /sys/kernel/debug/kmemleak || true + echo clear > /sys/kernel/debug/kmemleak || true fi echo Loading modules from $LUSTRE @@ -739,6 +753,7 @@ load_modules_local() { fi load_module ../lnet/klnds/$LNETLND load_module obdclass/obdclass + MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"} load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss load_module fld/fld @@ -802,12 +817,24 @@ load_modules_local() { } load_modules () { + local facets + local facet + local failover load_modules_local # bug 19124 # load modules on remote nodes optionally # lustre-tests have to be installed on these nodes if $LOAD_MODULES_REMOTE; then local list=$(comma_list $(remote_nodes_list)) + + # include failover nodes in case they are not in the list yet + facets=$(get_facets) + for facet in ${facets//,/ }; do + failover=$(facet_failover_host $facet) + [ -n "$list" ] && [[ ! "$list" =~ "$failover" ]] && + list="$list,$failover" + done + if [ -n "$list" ]; then echo "loading modules on: '$list'" do_rpc_nodes "$list" load_modules_local @@ -873,7 +900,7 @@ fs_log_size() { local size=0 case $(facet_fstype $facet) in - ldiskfs) size=50;; # largest seen is 44, leave some headroom + ldiskfs) size=72;; # largest seen is 64, leave some headroom # grant_block_size is in bytes, allow at least 2x max blocksize zfs) size=$(lctl get_param osc.$FSNAME*.import | awk '/grant_block_size:/ {print $2/512; exit;}') @@ -1555,8 +1582,7 @@ get_osd_param() { local device=${2:-$FSNAME-OST*} local name=$3 - do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \ - osd-*.$device.$name 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name" } set_osd_param() { @@ -1565,8 +1591,7 @@ set_osd_param() { local name=$3 local value=$4 - do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \ - osd-*.$device.$name=$value 2>&1" | grep -v 'error:' + do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value" } set_debug_size () { @@ -2035,6 +2060,9 @@ mount_facet() { local devicelabel local dm_dev=${!dev} + [[ $dev == "mgsfailover_dev" ]] && combined_mgs_mds && + dev=mds1failover_dev + module_loaded lustre || load_modules case $fstype in @@ -2127,11 +2155,17 @@ start() { eval export ${dev_alias}_dev=${device} eval export ${facet}_opt=\"$@\" + combined_mgs_mds && [[ ${dev_alias} == mds1 ]] && + eval export mgs_dev=${device} + local varname=${dev_alias}failover_dev if [ -n "${!varname}" ] ; then eval export ${dev_alias}failover_dev=${!varname} else eval export ${dev_alias}failover_dev=$device + combined_mgs_mds && [[ ${dev_alias} == mds1 ]] && + eval export mgsfailover_dev=${device} + fi local mntpt=$(facet_mntpt $facet) @@ -2194,7 +2228,7 @@ restore_quota() { if [ "$old_MDT_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable = \ + osd-*.$FSNAME-MDT*.quota_slave.enabled = \ $old_MDT_QUOTA_TYPE else do_facet mgs $PERM_CMD \ @@ -2204,7 +2238,7 @@ restore_quota() { if [ "$old_OST_QUOTA_TYPE" ]; then if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable = \ + osd-*.$FSNAME-OST*.quota_slave.enabled = \ $old_OST_QUOTA_TYPE else do_facet mgs $LCTL conf_param \ @@ -2218,6 +2252,7 @@ restore_quota() { # This will allow fixing the "lfs df" summary line in the future. lfs_df() { $LFS df $* | sed -e 's/filesystem /filesystem_/' + check_lfs_df_ret_val $? } # Get free inodes on the MDT specified by mdt index, free indoes on @@ -2265,9 +2300,9 @@ setup_quota(){ if [[ $PERM_CMD == *"set_param -P"* ]]; then do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-MDT*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-MDT*.quota_slave.enabled=$QUOTA_TYPE do_facet mgs $PERM_CMD \ - osd-*.$FSNAME-OST*.quota_slave.enable=$QUOTA_TYPE + osd-*.$FSNAME-OST*.quota_slave.enabled=$QUOTA_TYPE else do_facet mgs $PERM_CMD $FSNAME.quota.mdt=$QUOTA_TYPE || error "set mdt quota type failed" @@ -2715,11 +2750,12 @@ remount_facet() { reboot_facet() { local facet=$1 local node=$(facet_active_host $facet) + local sleep_time=${2:-10} if [ "$FAILURE_MODE" = HARD ]; then boot_node $node else - sleep 10 + sleep $sleep_time fi } @@ -3001,15 +3037,21 @@ stop_client_loads() { # If --verbose is passed as the first argument, the result is printed on each # value change, otherwise it is only printed after every 10s interval. # +# If --quiet is passed as the first/second argument, the do_node() command +# will not print the remote command before executing it each time. +# # Using wait_update_cond() or related helper function is preferable to adding # a "long enough" wait for some state to change in the background, since # "long enough" may be too short due to tunables, system config, or running in # a VM, and must by necessity wait too long for most cases or risk failure. # -# usage: wait_update_cond [--verbose] node check cond expect [max_wait] +# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait] wait_update_cond() { - local verbose=false - [[ "$1" == "--verbose" ]] && verbose=true && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$1 local check="$2" @@ -3024,7 +3066,7 @@ wait_update_cond() { local print=10 while (( $waited <= $max_wait )); do - result=$(do_node $node "$check") + result=$(do_node $quiet $node "$check") eval [[ "'$result'" $cond "'$expect'" ]] if [[ $? == 0 ]]; then @@ -3032,7 +3074,7 @@ wait_update_cond() { echo "Updated after ${waited}s: want '$expect' got '$result'" return 0 fi - if $verbose && [[ "$result" != "$prev_result" ]]; then + if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then [[ -n "$prev_result" ]] && echo "Changed after ${waited}s: from '$prev_result' to '$result'" prev_result="$result" @@ -3046,23 +3088,29 @@ wait_update_cond() { return 3 } -# usage: wait_update [--verbose] node check expect [max_wait] +# usage: wait_update [--verbose] [--quiet] node check expect [max_wait] wait_update() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node="$1" local check="$2" local expect="$3" local max_wait=$4 - wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait } # usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait] wait_update_facet_cond() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$(facet_active_host $1) local check="$2" @@ -3070,20 +3118,23 @@ wait_update_facet_cond() { local expect="$4" local max_wait=$5 - wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait + wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait } # usage: wait_update_facet [--verbose] facet check expect [max_wait] wait_update_facet() { - local verbose= - [ "$1" = "--verbose" ] && verbose="$1" && shift + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift local node=$(facet_active_host $1) local check="$2" local expect="$3" local max_wait=$4 - wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait + wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait } sync_all_data() { @@ -3347,8 +3398,11 @@ wait_recovery_complete () { fi echo affected facets: $facets - # we can use "for" here because we are waiting the slowest - for facet in ${facets//,/ }; do + facets=${facets//,/ } + # We can use "for" here because we are waiting the slowest. + # The mgs not having the recovery_status proc entry, exclude it + # from the facet list. + for facet in ${facets//mgs/ }; do local var_svc=${facet}_svc local param="*.${!var_svc}.recovery_status" @@ -3423,7 +3477,7 @@ wait_destroy_complete () { # MAX value shouldn't be big as this mean server responsiveness # never increase this just to make test pass but investigate # why it takes so long time - local MAX=5 + local MAX=${1:-5} local WAIT=0 local list=$(comma_list $(mdts_nodes)) while [ $WAIT -lt $MAX ]; do @@ -3507,12 +3561,20 @@ wait_remote_prog () { lfs_df_check() { local clients=${1:-$CLIENTS} + local rc if [ -z "$clients" ]; then - $LFS df $MOUNT + $LFS df $MOUNT > /dev/null + rc=$? else $PDSH $clients "$LFS df $MOUNT" > /dev/null + rc=$? fi + + check_lfs_df_ret_val $rc + rc=$? + + return $rc } clients_up() { @@ -3521,6 +3583,19 @@ clients_up() { lfs_df_check } +all_mds_up() { + (( MDSCOUNT == 1 )) && return + + # wait so that statfs data on MDT expire + local delay=$(do_facet $SINGLEMDS lctl \ + get_param -n osp.*MDT0000*MDT0001.maxage) + sleep $delay + local nodes=$(comma_list $(mdts_nodes)) + # initiate statfs RPC, all to all MDTs + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null +} + client_up() { # usually checked on particular client or locally sleep 1 @@ -3613,7 +3688,7 @@ facet_failover() { facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) echo reboot facets: ${affecteds[index]} - reboot_facet $facet + reboot_facet $facet $sleep_time change_active ${affecteds[index]} @@ -3626,11 +3701,14 @@ facet_failover() { if ! combined_mgs_mds && list_member ${affecteds[index]} mgs; then mount_facet mgs || error "Restart of mgs failed" + affecteds[index]=$(exclude_items_from_list \ + ${affecteds[index]} mgs) fi # FIXME; has to be changed to mount all facets concurrently - affected=$(exclude_items_from_list ${affecteds[index]} mgs) - echo mount facets: ${affecteds[index]} - mount_facets ${affecteds[index]} + if [ -n "${affecteds[index]}" ]; then + echo mount facets: ${affecteds[index]} + mount_facets ${affecteds[index]} + fi if $GSS_SK; then do_nodes $(comma_list $(all_nodes)) \ "keyctl show | grep lustre | cut -c1-11 | @@ -3754,6 +3832,7 @@ fail_abort() { mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" + all_mds_up } host_nids_address() { @@ -3913,7 +3992,14 @@ facet_host() { elif [ "${facet:0:3}" == "mdt" -o \ "${facet:0:3}" == "mds" -o \ "${facet:0:3}" == "mgs" ]; then - eval export ${facet}_HOST=${mds_HOST} + local temp + if [ "${facet}" == "mgsfailover" ] && + [ -n "$mds1failover_HOST" ]; then + temp=$mds1failover_HOST + else + temp=${mds_HOST} + fi + eval export ${facet}_HOST=$temp fi fi echo -n ${!varname} @@ -3929,6 +4015,13 @@ facet_failover_host() { return fi + if combined_mgs_mds && [ $facet == "mgs" ] && + [ -z $mds1failover_HOST ]; then + temp=mds1failover_HOST + echo ${!temp} + return + fi + if [ "${facet:0:3}" == "mdt" -o "${facet:0:3}" == "mds" -o \ "${facet:0:3}" == "mgs" ]; then @@ -3986,8 +4079,6 @@ change_active() { local facetlist=$1 local facet - facetlist=$(exclude_items_from_list $facetlist mgs) - for facet in ${facetlist//,/ }; do local failover=${facet}failover local host=`facet_host $failover` @@ -4010,26 +4101,27 @@ change_active() { } do_node() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local HOST=$1 - shift - local myPDSH=$PDSH - if [ "$HOST" = "$HOSTNAME" ]; then - myPDSH="no_dsh" - elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then - echo "cannot run remote command on $HOST with $myPDSH" - return 128 - fi - if $VERBOSE; then - echo "CMD: $HOST $@" >&2 - $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + + local HOST=$1 + shift + local myPDSH=$PDSH + + if [ "$HOST" = "$HOSTNAME" ]; then + myPDSH="no_dsh" + elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then + echo "cannot run remote command on $HOST with $myPDSH" + return 128 + fi + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $HOST $@" >&2 + $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || : + fi if [[ "$myPDSH" == "rsh" ]] || [[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then @@ -4040,31 +4132,35 @@ do_node() { eval $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; PATH=\$PATH:/sbin:/usr/sbin; cd $RPWD; - LUSTRE=\"$RLUSTRE\" sh -c \"$@\") || + LUSTRE=\"$RLUSTRE\" bash -c \"$@\") || echo command failed >$command_status" [[ -n "$($myPDSH $HOST cat $command_status)" ]] && return 1 || return 0 fi - if $verbose ; then - # print HOSTNAME for myPDSH="no_dsh" - if [[ $myPDSH = no_dsh ]]; then - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /" - else - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" - fi - else - $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed "s/^${HOST}: //" - fi - return ${PIPESTATUS[0]} -} - -do_nodev() { - do_node --verbose "$@" + if [[ -n "$verbose" ]]; then + # print HOSTNAME for myPDSH="no_dsh" + if [[ $myPDSH = no_dsh ]]; then + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" | + sed -e "s/^/${HOSTNAME}: /" + else + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" + fi + else + $myPDSH $HOST \ + "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\ + cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" | + sed "s/^${HOST}: //" + fi + return ${PIPESTATUS[0]} } single_local_node () { - [ "$1" = "$HOSTNAME" ] + [ "$1" = "$HOSTNAME" ] } # Outputs environment variable assignments that should be passed to remote nodes @@ -4110,45 +4206,42 @@ get_env_vars() { } do_nodes() { - local verbose=false - # do not stripe off hostname if verbose, bug 19215 - if [ x$1 = x--verbose ]; then - shift - verbose=true - fi + local verbose + local quiet - local rnodes=$1 - shift + # do not strip off hostname if verbose, b=19215 + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift - if single_local_node $rnodes; then - if $verbose; then - do_nodev $rnodes "$@" - else - do_node $rnodes "$@" - fi - return $? - fi + local rnodes=$1 + shift - # This is part from do_node - local myPDSH=$PDSH + if single_local_node $rnodes; then + do_node $verbose $quiet $rnodes "$@" + return $? + fi - [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \ - echo "cannot run remote command on $rnodes with $myPDSH" && return 128 + # This is part from do_node + local myPDSH=$PDSH - export FANOUT=$(get_node_count "${rnodes//,/ }") - if $VERBOSE; then - echo "CMD: $rnodes $@" >&2 - $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : - fi + [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && + echo "cannot run remote command on $rnodes with $myPDSH" && + return 128 - # do not replace anything from pdsh output if -N is used - # -N Disable hostname: prefix on lines of output. - if $verbose || [[ $myPDSH = *-N* ]]; then - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" - else - $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g" - fi - return ${PIPESTATUS[0]} + export FANOUT=$(get_node_count "${rnodes//,/ }") + if $VERBOSE && [[ -z "$quiet" ]]; then + echo "CMD: $rnodes $@" >&2 + $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || : + fi + + # do not replace anything from pdsh output if -N is used + # -N Disable hostname: prefix on lines of output. + if [[ -n "$verbose" || $myPDSH = *-N* ]]; then + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$@\")" + else + $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$@\")" | sed -re "s/^[^:]*: //g" + fi + return ${PIPESTATUS[0]} } ## @@ -4159,11 +4252,18 @@ do_nodes() { # # usage: do_facet $facet command [arg ...] do_facet() { + local verbose + local quiet + + [[ "$1" == "--verbose" ]] && verbose="$1" && shift + [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift + local facet=$1 shift - local HOST=$(facet_active_host $facet) - [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1 - do_node $HOST "$@" + local host=$(facet_active_host $facet) + + [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1 + do_node $verbose $quiet $host "$@" } # Function: do_facet_random_file $FACET $FILE $SIZE @@ -4186,7 +4286,7 @@ do_facet_create_file() { } do_nodesv() { - do_nodes --verbose "$@" + do_nodes --verbose "$@" } add() { @@ -5103,7 +5203,14 @@ init_facet_vars () { local varname=${facet}failover_HOST if [ -z "${!varname}" ]; then - eval export $varname=$(facet_host $facet) + local temp + if combined_mgs_mds && [ $facet == "mgs" ] && + [ -n "$mds1failover_HOST" ]; then + temp=$mds1failover_HOST + else + temp=$(facet_host $facet) + fi + eval export $varname=$temp fi varname=${facet}_HOST @@ -5136,12 +5243,12 @@ init_facets_vars () { if ! remote_mds_nodsh; then for num in $(seq $MDSCOUNT); do - DEVNAME=`mdsdevname $num` + DEVNAME=$(mdsdevname $num) init_facet_vars mds$num $DEVNAME $MDS_MOUNT_OPTS done fi - combined_mgs_mds || init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS + init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS if ! remote_ost_nodsh; then for num in $(seq $OSTCOUNT); do @@ -5271,8 +5378,9 @@ init_param_vars () { log "Using TIMEOUT=$TIMEOUT" # tune down to speed up testing on (usually) small setups + local mgc_timeout=/sys/module/mgc/parameters/mgc_requeue_timeout_min do_nodes $(comma_list $(nodes_list)) \ - "echo 1 >/sys/module/mgc/parameters/mgc_requeue_timeout_min" + "[ -f $mgc_timeout ] && echo 1 > $mgc_timeout; exit 0" osc_ensure_active $SINGLEMDS $TIMEOUT osc_ensure_active client $TIMEOUT @@ -5304,8 +5412,7 @@ init_param_vars () { fi (( MDS1_VERSION <= $(version_code 2.13.52) )) || - do_nodes $(comma_list $(mdts_nodes)) \ - "$LCTL set_param lod.*.mdt_hash=crush" + do_facet mgs "$LCTL set_param -P lod.*.mdt_hash=crush" return 0 } @@ -5433,6 +5540,65 @@ create_pools () { done } +set_pools_quota () { + local u + local o + local p + local i + local j + + [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \ + $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!" + + # POOLS_QUOTA_USERS_SET= + # "quota15_1:20M -- for all of the found pools + # quota15_2:1G:gpool0 + # quota15_3 -- for global limit only + # quota15_4:200M:gpool0 + # quota15_4:200M:gpool1" + + declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"}) + declare -a pq_users + declare -A pq_limits + + for ((i=0; i<${#pq_userset[@]}; i++)); do + u=${pq_userset[i]%%:*} + o="" + # user gets no pool limits if + # POOLS_QUOTA_USERS_SET does not specify it + [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:} + pq_limits[$u]+=" $o" + done + pq_users=(${!pq_limits[@]}) + + declare -a opts + local pool + + for ((i=0; i<${#pq_users[@]}; i++)); do + u=${pq_users[i]} + # set to max limit (_u64) + $LFS setquota -u $u -B $((2**24 - 1))T $DIR + opts=(${pq_limits[$u]}) + for ((j=0; j<${#opts[@]}; j++)); do + p=${opts[j]##*:} + o=${opts[j]%%:*} + # Set limit for all existing pools if + # no pool specified + if [ $p == $o ]; then + p=$(list_pool $FSNAME | sed "s/$FSNAME.//") + echo "No pool specified for $u, + set limit $o for all existing pools" + fi + for pool in $p; do + $LFS setquota -u $u -B $o --pool $pool $DIR || + error "setquota -u $u -B $o \ + --pool $pool failed" + done + done + $LFS quota -uv $u --pool $DIR + done +} + check_and_setup_lustre() { sanitize_parameters nfs_client_mode && return @@ -5525,6 +5691,9 @@ check_and_setup_lustre() { create_pools $FS_POOL $FS_POOL_NOSTS fi + if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then + set_pools_quota + fi if [ "$ONLY" == "setup" ]; then exit 0 fi @@ -5726,30 +5895,30 @@ check_and_cleanup_lustre() { # General functions wait_for_function () { - local quiet="" + local quiet="" - # suppress fn both stderr and stdout - if [ "$1" = "--quiet" ]; then - shift - quiet=" > /dev/null 2>&1" - - fi + # suppress fn both stderr and stdout + if [ "$1" = "--quiet" ]; then + shift + quiet=" > /dev/null 2>&1" + fi - local fn=$1 - local max=${2:-900} - local sleep=${3:-5} + local fn=$1 + local max=${2:-900} + local sleep=${3:-5} - local wait=0 + local wait=0 - while true; do + while true; do - eval $fn $quiet && return 0 + eval $fn $quiet && return 0 - wait=$((wait + sleep)) - [ $wait -lt $max ] || return 1 - echo waiting $fn, $((max - wait)) secs left ... - sleep $sleep - done + [ $wait -lt $max ] || return 1 + echo waiting $fn, $((max - wait)) secs left ... + wait=$((wait + sleep)) + [ $wait -gt $max ] && ((sleep -= wait - max)) + sleep $sleep + done } check_network() { @@ -6469,10 +6638,10 @@ check_mds() { } reset_fail_loc () { - echo -n "Resetting fail_loc on all nodes..." - do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \ - fail_val=0 2>/dev/null" || true - echo done. + #echo -n "Resetting fail_loc on all nodes..." + do_nodes --quiet $(comma_list $(nodes_list)) \ + "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true + #echo done. } @@ -6481,7 +6650,8 @@ reset_fail_loc () { # Also appends a timestamp and prepends the testsuite name. # -EQUALS="====================================================================================================" +# ======================================================== 15:06:12 (1624050372) +EQUALS="========================================================" banner() { msg="== ${TESTSUITE} $*" last=${msg: -1:1} @@ -6497,7 +6667,7 @@ check_dmesg_for_errors() { ldiskfs_check_descriptors: Checksum for group 0 failed\|\ group descriptors corrupted" - res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors") + res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors") [ -z "$res" ] && return 0 echo "Kernel error detected: $res" return 1 @@ -6658,7 +6828,7 @@ check_grant() { # sync all the data and make sure no pending data on server do_nodes $clients sync - clients_up # initiate all idling connections + do_nodes $clients $LFS df # initiate all idling connections # get client grant cli_grant=$(grant_from_clients $clients) @@ -6705,7 +6875,7 @@ ostuuid_from_index() } ostname_from_index() { - local uuid=$(ostuuid_from_index $1) + local uuid=$(ostuuid_from_index $1 $2) echo ${uuid/_UUID/} } @@ -7263,15 +7433,18 @@ restore_lustre_params() { check_node_health() { local nodes=${1:-$(comma_list $(nodes_list))} - - for node in ${nodes//,/ }; do - check_network "$node" 5 - if [ $? -eq 0 ]; then - do_node $node "$LCTL get_param catastrophe 2>&1" | - grep -q "catastrophe=1" && - error "$node:LBUG/LASSERT detected" || true - fi - done + local health=$TMP/node_health.$$ + + do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health | + grep "catastrophe=1" && error "LBUG/LASSERT detected" + # Only check/report network health if get_param isn't reported, since + # *clearly* the network is working if get_param returned something. + if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then + for node in ${nodes//,/}; do + check_network $node 5 + done + fi + rm -f $health } mdsrate_cleanup () { @@ -7336,48 +7509,49 @@ get_osc_import_name() { } _wait_import_state () { - local expected=$1 - local CONN_PROC=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local CONN_STATE - local i=0 + local expected="$1" + local CONN_PROC="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local CONN_STATE + local i=0 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do - if [ "${expected}" == "DISCONN" ]; then - # for disconn we can check after proc entry is removed - [ "x${CONN_STATE}" == "x" ] && return 0 - # with AT enabled, we can have connect request timeout near of - # reconnect timeout and test can't see real disconnect - [ "${CONN_STATE}" == "CONNECTING" ] && return 0 - fi - if [ $i -ge $maxtime ]; then - [ $error_on_failure -ne 0 ] && \ - error "can't put import for $CONN_PROC into ${expected}" \ - "state after $i sec, have ${CONN_STATE}" - return 1 - fi - sleep 1 - # Add uniq for multi-mount case - CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - i=$(($i + 1)) - done + while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do + if [[ "${expected}" == "DISCONN" ]]; then + # for disconn we can check after proc entry is removed + [[ -z "${CONN_STATE}" ]] && return 0 + # with AT, we can have connect request timeout near + # reconnect timeout and test can't see real disconnect + [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0 + fi + if (( $i >= $maxtime )); then + (( $err_on_fail != 0 )) && + error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}" + return 1 + fi + sleep 1 + # Add uniq for multi-mount case + CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | + cut -f2 | uniq) + i=$((i + 1)) + done - log "$CONN_PROC in ${CONN_STATE} state after $i sec" - return 0 + log "$CONN_PROC in ${CONN_STATE} state after $i sec" + return 0 } wait_import_state() { - local state=$1 - local params=$2 - local maxtime=${3:-$(max_recovery_time)} - local error_on_failure=${4:-1} - local param - - for param in ${params//,/ }; do - _wait_import_state $state $param $maxtime $error_on_failure || return - done + local expected="$1" + local params="$2" + local maxtime=${3:-$(max_recovery_time)} + local err_on_fail=${4:-1} + local param + + for param in ${params//,/ }; do + _wait_import_state "$expected" "$param" $maxtime $err_on_fail || + return + done } wait_import_state_mount() { @@ -7385,7 +7559,7 @@ wait_import_state_mount() { return 0 fi - wait_import_state $* + wait_import_state "$@" } # One client request could be timed out because server was not ready @@ -7584,11 +7758,10 @@ do_rpc_nodes () { } wait_clients_import_state () { - local list=$1 - local facet=$2 - local expected=$3 - - local facets=$facet + local list="$1" + local facet="$2" + local expected="$3" + local facets="$facet" if [ "$FAILURE_MODE" = HARD ]; then facets=$(facets_on_host $(facet_active_host $facet)) @@ -7599,11 +7772,11 @@ wait_clients_import_state () { local proc_path case $facet in ost* ) proc_path="osc.$(get_clientosc_proc_path \ - $label).ost_server_uuid" ;; + $label).ost_server_uuid" ;; mds* ) proc_path="mdc.$(get_clientmdc_proc_path \ - $label).mds_server_uuid" ;; + $label).mds_server_uuid" ;; mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \ - $label).mgs_server_uuid" ;; + $label).mgs_server_uuid" ;; *) error "unknown facet!" ;; esac @@ -8529,7 +8702,7 @@ get_block_count() { # ldiskfs xattrs over one block in size. Allow both the historical # Lustre feature name (large_xattr) and the upstream name (ea_inode). large_xattr_enabled() { - [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1 + [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0 local mds_dev=$(mdsdevname ${SINGLEMDS//mds/}) @@ -8578,7 +8751,6 @@ mds_backup_restore() { local devname=$(mdsdevname $(facet_number $facet)) local mntpt=$(facet_mntpt brpt) local rcmd="do_facet $facet" - local metaea=${TMP}/backup_restore.ea local metadata=${TMP}/backup_restore.tgz local opts=${MDS_MOUNT_FS_OPTS} local svc=${facet}_svc @@ -8592,41 +8764,36 @@ mds_backup_restore() { # step 1: build mount point ${rcmd} mkdir -p $mntpt # step 2: cleanup old backup - ${rcmd} rm -f $metaea $metadata + ${rcmd} rm -f $metadata # step 3: mount dev - ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1 + ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 3 if [ ! -z $igif ]; then # step 3.5: rm .lustre - ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 1 + ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 3 fi - # step 4: backup metaea - echo "backup EA" - ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" || - return 2 - # step 5: backup metadata + # step 4: backup metadata echo "backup data" - ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3 - # step 6: umount - ${rcmd} $UMOUNT $mntpt || return 4 - # step 8: reformat dev + ${rcmd} tar zcf $metadata --xattrs --xattrs-include="trusted.*" \ + --sparse -C $mntpt/ . > /dev/null 2>&1 || return 4 + # step 5: umount + ${rcmd} $UMOUNT $mntpt || return 5 + # step 6: reformat dev echo "reformat new device" format_mdt $(facet_number $facet) - # step 9: mount dev + # step 7: mount dev ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7 - # step 10: restore metadata + # step 8: restore metadata echo "restore data" - ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8 - # step 11: restore metaea - echo "restore EA" - ${rcmd} "cd $mntpt && setfattr --restore=$metaea && cd - " || return 9 - # step 12: remove recovery logs + ${rcmd} tar zxfp $metadata --xattrs --xattrs-include="trusted.*" \ + --sparse -C $mntpt > /dev/null 2>&1 || return 8 + # step 9: remove recovery logs echo "remove recovery logs" ${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS - # step 13: umount dev + # step 10: umount dev ${rcmd} $UMOUNT $mntpt || return 10 - # step 14: cleanup tmp backup + # step 11: cleanup tmp backup ${rcmd} rm -f $metaea $metadata - # step 15: reset device label - it's not virgin on + # step 12: reset device label - it's not virgin on ${rcmd} e2label $devname ${!svc} } @@ -9444,8 +9611,8 @@ changelog_register() { error "$mdt: changelog_mask=+hsm failed: $?" local cl_user - cl_user=$(do_facet $facet \ - $LCTL --device $mdt changelog_register -n) || + cl_user=$(do_facet $facet $LCTL --device $mdt \ + changelog_register -n $@) || error "$mdt: register changelog user failed: $?" stack_trap "__changelog_deregister $facet $cl_user" EXIT @@ -10461,8 +10628,156 @@ function restore_opencache() } # LU-13417: XXX lots of tests assume the directory to be created under MDT0, -# using this function to create directory under MDT0 explicitly. -# Don't use it in new tests, and remove it from old tests. +# created on MDT0, use this function to create directory on specific MDT +# explicitly, and set default LMV to create subdirs on the same MDT too. +mkdir_on_mdt() { + local mdt + local OPTIND=1 + + while getopts "i:" opt $*; do + case $opt in + i) mdt=$OPTARG;; + esac + done + + shift $((OPTIND - 1)) + + $LFS mkdir -i $mdt -c 1 $* + # setting default LMV in non-DNE system will cause sanity-quota 41 fail + ((MDSCOUNT < 2)) || $LFS setdirstripe -D -i $mdt -c 1 $* +} + mkdir_on_mdt0() { - $LFS mkdir -i 0 -c 1 $* + mkdir_on_mdt -i0 $* +} + +# Wait for nodemap synchronization +wait_nm_sync() { + local nodemap_name=$1 + local key=$2 + local value=$3 + local opt=$4 + local proc_param + local is_active=$(do_facet mgs $LCTL get_param -n nodemap.active) + local max_retries=20 + local is_sync + local out1="" + local out2 + local mgs_ip=$(host_nids_address $mgs_HOST $NETTYPE | cut -d' ' -f1) + local i + + if [ "$nodemap_name" == "active" ]; then + proc_param="active" + elif [ -z "$key" ]; then + proc_param=${nodemap_name} + else + proc_param="${nodemap_name}.${key}" + fi + if [ "$opt" == "inactive" ]; then + # check nm sync even if nodemap is not activated + is_active=1 + opt="" + fi + (( is_active == 0 )) && [ "$proc_param" != "active" ] && return + + if [ -z "$value" ]; then + out1=$(do_facet mgs $LCTL get_param $opt \ + nodemap.${proc_param} 2>/dev/null) + echo "On MGS ${mgs_ip}, ${proc_param} = $out1" + else + out1=$value; + fi + + # if servers run on the same node, it is impossible to tell if they get + # synced with the mgs, so just wait an arbitrary 10 seconds + if [ $(facet_active_host mgs) == $(facet_active_host mds) ] && + [ $(facet_active_host mgs) == $(facet_active_host ost1) ]; then + echo "waiting 10 secs for sync" + sleep 10 + return + fi + + # wait up to 10 seconds for other servers to sync with mgs + for i in $(seq 1 10); do + for node in $(all_server_nodes); do + local node_ip=$(host_nids_address $node $NETTYPE | + cut -d' ' -f1) + + is_sync=true + if [ -z "$value" ]; then + [ $node_ip == $mgs_ip ] && continue + fi + + out2=$(do_node $node_ip $LCTL get_param $opt \ + nodemap.$proc_param 2>/dev/null) + echo "On $node ${node_ip}, ${proc_param} = $out2" + [ "$out1" != "$out2" ] && is_sync=false && break + done + $is_sync && break + sleep 1 + done + if ! $is_sync; then + echo MGS + echo $out1 + echo OTHER - IP: $node_ip + echo $out2 + error "mgs and $nodemap_name ${key} mismatch, $i attempts" + fi + echo "waited $((i - 1)) seconds for sync" +} + +consume_precreations() { + local dir=$1 + local mfacet=$2 + local OSTIDX=$3 + local extra=${4:-2} + local OST=$(ostname_from_index $OSTIDX $dir) + + test_mkdir -p $dir/${OST} + $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST} + + # on the mdt's osc + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST) + local last_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_last_id) + local next_id=$(do_facet $mfacet $LCTL get_param -n \ + osp.$mdtosc_proc.prealloc_next_id) + echo "Creating to objid $last_id on ost $OST..." + createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra)) +} + +__exhaust_precreations() { + local OSTIDX=$1 + local FAILLOC=$2 + local FAILIDX=${3:-$OSTIDX} + local ofacet=ost$((OSTIDX + 1)) + + mkdir_on_mdt0 $DIR/$tdir + local mdtidx=$($LFS getstripe -m $DIR/$tdir) + local mfacet=mds$((mdtidx + 1)) + echo OSTIDX=$OSTIDX MDTIDX=$mdtidx + + local mdtosc_proc=$(get_mdtosc_proc_path $mfacet) + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + +#define OBD_FAIL_OST_ENOSPC 0x215 + do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215 + + consume_precreations $DIR/$tdir $mfacet $OSTIDX + + do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc* + do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC +} + +exhaust_precreations() { + __exhaust_precreations $1 $2 $3 + sleep_maxage +} + +exhaust_all_precreations() { + local i + for (( i=0; i < OSTCOUNT; i++ )) ; do + __exhaust_precreations $i $1 -1 + done + sleep_maxage }