X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=b7ef7f4185c29b74c70e6c8d33c270b038630ef5;hp=e499d7f36675a4179395396bcbcf5a230472471d;hb=0a338970c2c73e14cc9be65d360de85be28ca488;hpb=7b569574a484bb781ed5796040e0eb357aaeefb9 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index e499d7f..b7ef7f4 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -23,6 +23,11 @@ export LOAD_LLOOP=${LOAD_LLOOP:-false} #export PDSH="pdsh -S -Rssh -w" export MOUNT_CMD=${MOUNT_CMD:-"mount -t lustre"} +export UMOUNT=${UMOUNT:-"umount -d"} +# sles12 umount has a issue with -d option +[ -e /etc/SuSE-release ] && grep -w VERSION /etc/SuSE-release | grep -wq 12 && { + export UMOUNT="umount" +} # function used by scripts run on remote nodes LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} @@ -49,16 +54,25 @@ fi [ -z "$MODPROBECONF" -a -f /etc/modprobe.conf ] && MODPROBECONF=/etc/modprobe.conf +sanitize_parameters() { + for i in DIR DIR1 DIR2 MOUNT MOUNT1 MOUNT2 + do + local path=${!i} + if [ -d "$path" ]; then + eval export $i=$(echo $path | sed -E 's/\/+$//g') + fi + done +} assert_DIR () { - local failed="" - [[ $DIR/ = $MOUNT/* ]] || \ - { failed=1 && echo "DIR=$DIR not in $MOUNT. Aborting."; } - [[ $DIR1/ = $MOUNT1/* ]] || \ - { failed=1 && echo "DIR1=$DIR1 not in $MOUNT1. Aborting."; } - [[ $DIR2/ = $MOUNT2/* ]] || \ - { failed=1 && echo "DIR2=$DIR2 not in $MOUNT2. Aborting"; } + local failed="" + [[ $DIR/ = $MOUNT/* ]] || + { failed=1 && echo "DIR=$DIR not in $MOUNT. Aborting."; } + [[ $DIR1/ = $MOUNT1/* ]] || + { failed=1 && echo "DIR1=$DIR1 not in $MOUNT1. Aborting."; } + [[ $DIR2/ = $MOUNT2/* ]] || + { failed=1 && echo "DIR2=$DIR2 not in $MOUNT2. Aborting"; } - [ -n "$failed" ] && exit 99 || true + [ -n "$failed" ] && exit 99 || true } usage() { @@ -71,7 +85,6 @@ usage() { print_summary () { trap 0 [ -z "$DEFAULT_SUITES"] && return 0 - [ "$TESTSUITE" == "lfsck" ] && return 0 [ -n "$ONLY" ] && echo "WARNING: ONLY is set to $(echo $ONLY)" local details local form="%-13s %-17s %-9s %s %s\n" @@ -193,9 +206,8 @@ init_test_env() { fi fi - export LFSCK_BIN=${LFSCK_BIN:-lfsck} - export LFSCK_ALWAYS=${LFSCK_ALWAYS:-"no"} # check fs after each test suite - export FSCK_MAX_ERR=4 # File system errors left uncorrected + export LFSCK_ALWAYS=${LFSCK_ALWAYS:-"no"} # check fs after test suite + export FSCK_MAX_ERR=4 # File system errors left uncorrected export ZFS=${ZFS:-zfs} export ZPOOL=${ZPOOL:-zpool} @@ -557,13 +569,15 @@ load_modules_local() { load_module lov/lov load_module mgc/mgc load_module obdecho/obdecho - if ! client_only; then - SYMLIST=/proc/kallsyms - grep -q crc16 $SYMLIST || { modprobe crc16 2>/dev/null || true; } - grep -q -w jbd $SYMLIST || { modprobe jbd 2>/dev/null || true; } - grep -q -w jbd2 $SYMLIST || { modprobe jbd2 2>/dev/null || true; } + if ! client_only; then + SYMLIST=/proc/kallsyms + grep -q crc16 $SYMLIST || + { modprobe crc16 2>/dev/null || true; } + grep -q -w jbd2 $SYMLIST || + { modprobe jbd2 2>/dev/null || true; } load_module lfsck/lfsck - [ "$LQUOTA" != "no" ] && load_module quota/lquota $LQUOTAOPTS + [ "$LQUOTA" != "no" ] && + load_module quota/lquota $LQUOTAOPTS if [[ $(node_fstypes $HOSTNAME) == *zfs* ]]; then modprobe zfs load_module osd-zfs/osd_zfs @@ -571,6 +585,8 @@ load_modules_local() { if [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then grep -q exportfs_decode_fh $SYMLIST || { modprobe exportfs 2> /dev/null || true; } + grep -q -w mbcache $SYMLIST || + { modprobe mbcache 2>/dev/null || true; } load_module ../ldiskfs/ldiskfs load_module osd-ldiskfs/osd_ldiskfs fi @@ -582,7 +598,7 @@ load_modules_local() { load_module osp/osp load_module ofd/ofd load_module osp/osp - fi + fi load_module llite/lustre llite_lloop_enabled && load_module llite/llite_lloop @@ -594,7 +610,7 @@ load_modules_local() { # 'mount' doesn't look in $PATH, just sbin local mount_lustre=$LUSTRE/utils/mount.lustre if [ -f $mount_lustre ]; then - local sbin_mount=/sbin/mount.lustre + local sbin_mount=$(readlink -f /sbin)/mount.lustre if grep -qw "$sbin_mount" /proc/mounts; then cmp -s $mount_lustre $sbin_mount || umount $sbin_mount fi @@ -658,7 +674,7 @@ unload_modules() { fi fi - local sbin_mount=/sbin/mount.lustre + local sbin_mount=$(readlink -f /sbin)/mount.lustre if grep -qe "$sbin_mount " /proc/mounts; then umount $sbin_mount || true [ -s $sbin_mount ] && ! grep -q "STUB MARK" $sbin_mount || @@ -814,7 +830,7 @@ facet_type() { facet_number() { local facet=$1 - if [ $facet == mgs ]; then + if [ $facet == mgs ] || [ $facet == client ]; then return 1 fi @@ -1113,7 +1129,7 @@ get_osd_param() { local name=$3 do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \ - osd-*.$device.$name 2>&1" | grep -v 'Found no match' + osd-*.$device.$name 2>&1" | grep -v 'error:' } set_osd_param() { @@ -1123,7 +1139,7 @@ set_osd_param() { local value=$4 do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \ - osd-*.$device.$name=$value 2>&1" | grep -v 'Found no match' + osd-*.$device.$name=$value 2>&1" | grep -v 'error:' } set_debug_size () { @@ -1220,6 +1236,10 @@ mount_facet() { local opt=${facet}_opt local mntpt=$(facet_mntpt $facet) local opts="${!opt} $@" + local fstype=$(facet_fstype $facet) + local devicelabel + + module_loaded lustre || load_modules if [ $(facet_fstype $facet) == ldiskfs ] && ! do_facet $facet test -b ${!dev}; then @@ -1231,6 +1251,16 @@ mount_facet() { import_zpool $facet || return ${PIPESTATUS[0]} fi + case $fstype in + ldiskfs) + devicelabel=$(do_facet ${facet} "$E2LABEL ${!dev}");; + zfs) + devicelabel=$(do_facet ${facet} "$ZFS get -H -o value \ + lustre:svname ${!dev}");; + *) + error "unknown fstype!";; + esac + echo "Starting ${facet}: $opts ${!dev} $mntpt" # for testing LU-482 error handling in mount_facets() and test_0a() if [ -f $TMP/test-lu482-trigger ]; then @@ -1240,40 +1270,68 @@ mount_facet() { ${!dev} $mntpt" RC=${PIPESTATUS[0]} fi + if [ $RC -ne 0 ]; then echo "Start of ${!dev} on ${facet} failed ${RC}" - else - set_default_debug_facet $facet + return $RC + fi + + set_default_debug_facet $facet if [[ $facet == mds* ]]; then do_facet $facet \ - lctl set_param -n mdt.${FSNAME}*.enable_remote_dir=1 \ - 2>/dev/null + lctl set_param -n mdt.${FSNAME}*.enable_remote_dir=1 2>/dev/null fi - label=$(devicelabel ${facet} ${!dev}) - [ -z "$label" ] && echo no label for ${!dev} && exit 1 - eval export ${facet}_svc=${label} - echo Started ${label} - fi - return $RC + if [[ $opts =~ .*nosvc.* ]]; then + echo "Start ${!dev} without service" + else + + case $fstype in + ldiskfs) + wait_update_facet ${facet} "$E2LABEL ${!dev} \ + 2>/dev/null | grep -E ':[a-zA-Z]{3}[0-9]{4}'" \ + "" || error "${!dev} failed to initialize!";; + zfs) + wait_update_facet ${facet} "$ZFS get -H -o value \ + lustre:svname ${!dev} 2>/dev/null | \ + grep -E ':[a-zA-Z]{3}[0-9]{4}'" "" || + error "${!dev} failed to initialize!";; + + *) + error "unknown fstype!";; + esac + fi + + # commit the device label change to disk + if [[ $devicelabel =~ (:[a-zA-Z]{3}[0-9]{4}) ]]; then + do_facet $facet "sync; sync; sync" + fi + + + label=$(devicelabel ${facet} ${!dev}) + [ -z "$label" ] && echo no label for ${!dev} && exit 1 + eval export ${facet}_svc=${label} + echo Started ${label} + + return $RC } # start facet device options start() { - local facet=$1 - shift - local device=$1 - shift - eval export ${facet}_dev=${device} - eval export ${facet}_opt=\"$@\" + local facet=$1 + shift + local device=$1 + shift + eval export ${facet}_dev=${device} + eval export ${facet}_opt=\"$@\" - local varname=${facet}failover_dev - if [ -n "${!varname}" ] ; then - eval export ${facet}failover_dev=${!varname} - else - eval export ${facet}failover_dev=$device - fi + local varname=${facet}failover_dev + if [ -n "${!varname}" ] ; then + eval export ${facet}failover_dev=${!varname} + else + eval export ${facet}failover_dev=$device + fi local mntpt=$(facet_mntpt $facet) do_facet ${facet} mkdir -p $mntpt @@ -1301,7 +1359,7 @@ stop() { running=$(do_facet ${facet} "grep -c $mntpt' ' /proc/mounts") || true if [ ${running} -ne 0 ]; then echo "Stopping $mntpt (opts:$@) on $HOST" - do_facet ${facet} umount -d $@ $mntpt + do_facet ${facet} $UMOUNT $@ $mntpt fi # umount should block, but we should wait for unrelated obd's @@ -1354,70 +1412,6 @@ quota_type() { return $rc } -# XXX This function is kept for interoperability with old server (< 2.3.50), -# it should be removed whenever we drop the interoperability for such -# server. -restore_quota_old() { - local mntpt=${1:-$MOUNT} - local quota_type=$(quota_type $FSNAME | grep MDT | cut -d "=" -f2) - if [ ! "$old_QUOTA_TYPE" ] || - [ "$quota_type" = "$old_QUOTA_TYPE" ]; then - return - fi - quota_save_version $old_QUOTA_TYPE -} - -# XXX This function is kept for interoperability with old server (< 2.3.50), -# it should be removed whenever we drop the interoperability for such -# server. -setup_quota_old(){ - local mntpt=$1 - - # no quota enforcement for now and accounting works out of the box - return - - # We need save the original quota_type params, and restore them after testing - - # Suppose that quota type the same on mds and ost - local quota_type=$(quota_type | grep MDT | cut -d "=" -f2) - [ ${PIPESTATUS[0]} -eq 0 ] || error "quota_type failed!" - echo "[HOST:$HOSTNAME] [old_quota_type:$quota_type] [new_quota_type:$QUOTA_TYPE]" - if [ "$quota_type" != "$QUOTA_TYPE" ]; then - export old_QUOTA_TYPE=$quota_type - quota_save_version $QUOTA_TYPE - else - qtype=$(tr -c -d "ug" <<< $QUOTA_TYPE) - $LFS quotacheck -$qtype $mntpt || error "quotacheck has failed for $type" - fi - - local quota_usrs=$QUOTA_USERS - - # get_filesystem_size - local disksz=$(lfs_df $mntpt | grep "summary" | awk '{print $2}') - local blk_soft=$((disksz + 1024)) - local blk_hard=$((blk_soft + blk_soft / 20)) # Go 5% over - - local Inodes=$(lfs_df -i $mntpt | grep "summary" | awk '{print $2}') - local i_soft=$Inodes - local i_hard=$((i_soft + i_soft / 20)) - - echo "Total disk size: $disksz block-softlimit: $blk_soft block-hardlimit: - $blk_hard inode-softlimit: $i_soft inode-hardlimit: $i_hard" - - local cmd - for usr in $quota_usrs; do - echo "Setting up quota on $HOSTNAME:$mntpt for $usr..." - for type in u g; do - cmd="$LFS setquota -$type $usr -b $blk_soft -B $blk_hard -i $i_soft -I $i_hard $mntpt" - echo "+ $cmd" - eval $cmd || error "$cmd FAILED!" - done - # display the quota status - echo "Quota settings for $usr : " - $LFS quota -v -u $usr $mntpt || true - done -} - # get mdt quota type mdt_quota_type() { local varsvc=${SINGLEMDS}_svc @@ -1435,11 +1429,6 @@ ost_quota_type() { # restore old quota type settings restore_quota() { - if [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.50) ]; then - restore_quota_old - return - fi - if [ "$old_MDT_QUOTA_TYPE" ]; then do_facet mgs $LCTL conf_param \ $FSNAME.quota.mdt=$old_MDT_QUOTA_TYPE @@ -1475,11 +1464,6 @@ mdt_free_inodes() { } setup_quota(){ - if [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.50) ]; then - setup_quota_old $1 - return - fi - local mntpt=$1 # save old quota type & set new quota type @@ -1528,25 +1512,37 @@ setup_quota(){ } zconf_mount() { - local client=$1 - local mnt=$2 - local opts=${3:-$MOUNT_OPTS} - opts=${opts:+-o $opts} - local flags=${4:-$MOUNT_FLAGS} - - local device=$MGSNID:/$FSNAME - if [ -z "$mnt" -o -z "$FSNAME" ]; then - echo Bad zconf mount command: opt=$flags $opts dev=$device mnt=$mnt - exit 1 - fi - - echo "Starting client: $client: $flags $opts $device $mnt" - do_node $client mkdir -p $mnt - do_node $client $MOUNT_CMD $flags $opts $device $mnt || return 1 - - set_default_debug_nodes $client + local client=$1 + local mnt=$2 + local opts=${3:-$MOUNT_OPTS} + opts=${opts:+-o $opts} + local flags=${4:-$MOUNT_FLAGS} + + local device=$MGSNID:/$FSNAME$FILESET + if [ -z "$mnt" -o -z "$FSNAME" ]; then + echo "Bad mount command: opt=$flags $opts dev=$device " \ + "mnt=$mnt" + exit 1 + fi + + echo "Starting client: $client: $flags $opts $device $mnt" + do_node $client mkdir -p $mnt + if [ -n "$FILESET" -a -z "$SKIP_FILESET" ];then + do_node $client $MOUNT_CMD $flags $opts $MGSNID:/$FSNAME \ + $mnt || return 1 + #disable FILESET if not supported + do_nodes $client lctl get_param -n \ + mdc.$FSNAME-MDT0000*.import | grep -q subtree || + device=$MGSNID:/$FSNAME + do_node $client mkdir -p $mnt/$FILESET + do_node $client "! grep -q $mnt' ' /proc/mounts || + umount $mnt" + fi + do_node $client $MOUNT_CMD $flags $opts $device $mnt || return 1 + + set_default_debug_nodes $client - return 0 + return 0 } zconf_umount() { @@ -1636,21 +1632,35 @@ sanity_mount_check () { # mount clients if not mouted zconf_mount_clients() { - local clients=$1 - local mnt=$2 - local opts=${3:-$MOUNT_OPTS} - opts=${opts:+-o $opts} - local flags=${4:-$MOUNT_FLAGS} - - local device=$MGSNID:/$FSNAME - if [ -z "$mnt" -o -z "$FSNAME" ]; then - echo Bad zconf mount command: opt=$flags $opts dev=$device mnt=$mnt - exit 1 - fi - - echo "Starting client $clients: $flags $opts $device $mnt" - - do_nodes $clients " + local clients=$1 + local mnt=$2 + local opts=${3:-$MOUNT_OPTS} + opts=${opts:+-o $opts} + local flags=${4:-$MOUNT_FLAGS} + + local device=$MGSNID:/$FSNAME$FILESET + if [ -z "$mnt" -o -z "$FSNAME" ]; then + echo "Bad conf mount command: opt=$flags $opts dev=$device " \ + "mnt=$mnt" + exit 1 + fi + + echo "Starting client $clients: $flags $opts $device $mnt" + if [ -n "$FILESET" -a ! -n "$SKIP_FILESET" ]; then + do_nodes $clients "! grep -q $mnt' ' /proc/mounts || + umount $mnt" + do_nodes $clients $MOUNT_CMD $flags $opts $MGSNID:/$FSNAME \ + $mnt || return 1 + #disable FILESET if not supported + do_nodes $clients lctl get_param -n \ + mdc.$FSNAME-MDT0000*.import | grep -q subtree || + device=$MGSNID:/$FSNAME + do_nodes $clients mkdir -p $mnt/$FILESET + do_nodes $clients "! grep -q $mnt' ' /proc/mounts || + umount $mnt" + fi + + do_nodes $clients " running=\\\$(mount | grep -c $mnt' '); rc=0; if [ \\\$running -eq 0 ] ; then @@ -1660,12 +1670,12 @@ if [ \\\$running -eq 0 ] ; then fi; exit \\\$rc" || return ${PIPESTATUS[0]} - echo "Started clients $clients: " - do_nodes $clients "mount | grep $mnt' '" + echo "Started clients $clients: " + do_nodes $clients "mount | grep $mnt' '" - set_default_debug_nodes $clients + set_default_debug_nodes $clients - return 0 + return 0 } zconf_umount_clients() { @@ -2107,16 +2117,19 @@ wait_update () { } wait_update_facet() { + local verbose= + [ "$1" = "--verbose" ] && verbose="$1" && shift + local facet=$1 shift - wait_update $(facet_active_host $facet) "$@" + wait_update $verbose $(facet_active_host $facet) "$@" } sync_all_data() { do_nodes $(comma_list $(mdts_nodes)) \ - "lctl set_param -n osd*.*MDT*.force_sync 1" + "lctl set_param -n osd*.*MDT*.force_sync=1" do_nodes $(comma_list $(osts_nodes)) \ - "lctl set_param -n osd*.*OS*.force_sync 1" 2>&1 | + "lctl set_param -n osd*.*OS*.force_sync=1" 2>&1 | grep -v 'Found no match' } @@ -2432,6 +2445,13 @@ affected_facets () { } facet_failover() { + local E2FSCK_ON_MDT0=false + if [ "$1" == "--fsck" ]; then + shift + [ $(facet_fstype $SINGLEMDS) == ldiskfs ] && + E2FSCK_ON_MDT0=true + fi + local facets=$1 local sleep_time=$2 local -a affecteds @@ -2465,6 +2485,9 @@ facet_failover() { shutdown_facet $facet done + $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ + $(mdsdevname 1) "-n" || error "Running e2fsck") + for ((index=0; index<$total; index++)); do facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) echo reboot facets: ${affecteds[index]} @@ -3222,7 +3245,7 @@ unmount_ldiskfs() { local dev=$(facet_device $facet) local mnt=$(facet_mntpt $facet) - do_facet $facet umount -d $mnt + do_facet $facet $UMOUNT $mnt } var_name() { @@ -3965,12 +3988,13 @@ is_empty_fs() { } check_and_setup_lustre() { - nfs_client_mode && return + sanitize_parameters + nfs_client_mode && return cifs_client_mode && return - local MOUNTED=$(mounted_lustre_filesystems) + local MOUNTED=$(mounted_lustre_filesystems) - local do_check=true + local do_check=true # 1. # both MOUNT and MOUNT2 are not mounted if ! is_mounted $MOUNT && ! is_mounted $MOUNT2; then @@ -4179,88 +4203,31 @@ check_shared_dir() { return 0 } -# Run e2fsck on MDT and OST(s) to generate databases used for lfsck. -generate_db() { - local i - local ostidx - local dev - local node - - [[ $(lustre_version_code $SINGLEMDS) -ne $(version_code 2.2.0) ]] || - { skip "Lustre 2.2.0 lacks the patch for LU-1255"; exit 0; } - - check_shared_dir $SHARED_DIRECTORY || - error "$SHARED_DIRECTORY isn't a shared directory" - - export MDSDB=$SHARED_DIRECTORY/mdsdb - export OSTDB=$SHARED_DIRECTORY/ostdb - - # DNE is not supported, so when running e2fsck on a DNE filesystem, - # we only pass master MDS parameters. - run_e2fsck $MDTNODE $MDTDEV "-n --mdsdb $MDSDB" - - i=0 - ostidx=0 - OSTDB_LIST="" - for node in $(osts_nodes); do - for dev in ${OSTDEVS[i]}; do - run_e2fsck $node $dev "-n --mdsdb $MDSDB --ostdb $OSTDB-$ostidx" - OSTDB_LIST="$OSTDB_LIST $OSTDB-$ostidx" - ostidx=$((ostidx + 1)) - done - i=$((i + 1)) - done -} - -# Run lfsck on server node if lfsck can't be found on client (LU-2571) -run_lfsck_remote() { - local cmd="$LFSCK_BIN -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT" - local client=$1 - local mounted=true - local rc=0 - - #Check if lustre is already mounted - do_rpc_nodes $client is_mounted $MOUNT || mounted=false - if ! $mounted; then - zconf_mount $client $MOUNT || - error "failed to mount Lustre on $client" - fi - #Run lfsck - echo $cmd - do_node $client $cmd || rc=$? - #Umount if necessary - if ! $mounted; then - zconf_umount $client $MOUNT || - error "failed to unmount Lustre on $client" - fi - - [ $rc -le $FSCK_MAX_ERR ] || - error "$cmd returned $rc, should be <= $FSCK_MAX_ERR" - echo "lfsck finished with rc=$rc" - - return $rc -} - run_lfsck() { - local facets="client $SINGLEMDS" - local found=false - local facet - local node - local rc=0 - - for facet in $facets; do - node=$(facet_active_host $facet) - if check_progs_installed $node $LFSCK_BIN; then - found=true - break - fi + do_nodes $(comma_list $(mdts_nodes) $(osts_nodes)) \ + $LCTL set_param printk=+lfsck + do_facet $SINGLEMDS "$LCTL lfsck_start -M $FSNAME-MDT0000 -r -A -t all" + + for k in $(seq $MDSCOUNT); do + # wait up to 10+1 minutes for LFSCK to complete + wait_update_facet --verbose mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 600 || + error "MDS${k} layout isn't the expected 'completed'" + wait_update_facet --verbose mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 60 || + error "MDS${k} namespace isn't the expected 'completed'" done - ! $found && error "None of \"$facets\" supports lfsck" - - run_lfsck_remote $node || rc=$? - - rm -rvf $MDSDB* $OSTDB* || true - return $rc + local rep_mdt=$(do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL get_param -n mdd.$FSNAME-*.lfsck_* | + awk '/repaired/ { print $2 }' | calc_sum) + local rep_ost=$(do_nodes $(comma_list $(osts_nodes)) \ + $LCTL get_param -n obdfilter.$FSNAME-*.lfsck_* | + awk '/repaired/ { print $2 }' | calc_sum) + local repaired=$((rep_mdt + rep_ost)) + [ $repaired -eq 0 ] || + error "lfsck repaired $rep_mdt MDT and $rep_ost OST errors" } dump_file_contents() { @@ -4318,11 +4285,10 @@ log_zfs_info() { } check_and_cleanup_lustre() { - if [ "$LFSCK_ALWAYS" = "yes" -a "$TESTSUITE" != "lfsck" ]; then - get_svr_devs - generate_db - run_lfsck - fi + if [ "$LFSCK_ALWAYS" = "yes" -a "$TESTSUITE" != "sanity-lfsck" -a \ + "$TESTSUITE" != "sanity-scrub" ]; then + run_lfsck + fi if is_mounted $MOUNT; then [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* || @@ -4653,33 +4619,10 @@ set_nodes_failloc () { } cancel_lru_locks() { - $LCTL mark "cancel_lru_locks $1 start" - - if [ $1 != "MGC" ]; then - for d in $(lctl get_param -N ldlm.namespaces.*.lru_size | - egrep -i $1); do - $LCTL set_param -n $d=clear - done - $LCTL get_param ldlm.namespaces.*.lock_unused_count | egrep -i $1 | - grep -v '=0' - else - for d in $(find \ - /{proc,sys}/fs/lustre/ldlm/namespaces/*$1*/lru_size \ - 2> /dev/null); do - echo "clear" > $d - done - - for d in $(find \ - /{proc,sys}/fs/lustre/ldlm/namespaces/*$1*/lock_unused_count \ - 2> /dev/null); do - if [ $(cat $d) != 0 ]; then - echo "ldlm.namespaces.$(echo "$d" | - cut -f 7 -d'/').lock_unused_count=$(cat $d)" - fi - done - fi - - $LCTL mark "cancel_lru_locks $1 stop" + #$LCTL mark "cancel_lru_locks $1 start" + $LCTL set_param -n ldlm.namespaces.*$1*.lru_size=clear + $LCTL get_param ldlm.namespaces.*$1*.lock_unused_count | grep -v '=0' + #$LCTL mark "cancel_lru_locks $1 stop" } default_lru_size() @@ -4756,20 +4699,17 @@ stop_full_debug_logging() { # prints bash call stack print_stack_trace() { + local skip=${1:-1} echo " Trace dump:" - for (( i=1; i < ${#BASH_LINENO[*]} ; i++ )) ; do - local s=${BASH_SOURCE[$i]} - local l=${BASH_LINENO[$i-1]} - local f=${FUNCNAME[$i]} - echo " = $s:$l:$f()" + for (( i=$skip; i < ${#BASH_LINENO[*]} ; i++ )) ; do + local src=${BASH_SOURCE[$i]} + local lineno=${BASH_LINENO[$i-1]} + local funcname=${FUNCNAME[$i]} + echo " = $src:$lineno:$funcname()" done } -################################## -# Test interface -################################## - -error_noexit() { +report_error() { local TYPE=${TYPE:-"FAIL"} local dump=true @@ -4779,10 +4719,8 @@ error_noexit() { dump=false fi - log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $@ " - print_stack_trace >&2 - + (print_stack_trace 2) >&2 mkdir -p $LOGDIR # We need to dump the logs on all nodes if $dump; then @@ -4801,6 +4739,17 @@ error_noexit() { echo "$@" > $LOGDIR/err fi fi + + # cleanup the env for failed tests + reset_fail_loc +} + +################################## +# Test interface +################################## + +error_noexit() { + report_error "$@" } exit_status () { @@ -4812,12 +4761,13 @@ exit_status () { } error() { - error_noexit "$@" + report_error "$@" exit 1 } error_exit() { - error "$@" + report_error "$@" + exit 1 } # use only if we are ignoring failures for this test, bugno required. @@ -4827,11 +4777,11 @@ error_exit() { error_ignore() { local TYPE="IGNORE ($1)" shift - error_noexit "$@" + report_error "$@" } error_and_remount() { - error_noexit "$@" + report_error "$@" remount_client $MOUNT exit 1 } @@ -5043,6 +4993,18 @@ banner() { log "$msg== $(date +"%H:%M:%S (%s)")" } +check_dmesg_for_errors() { + local res + local errors="VFS: Busy inodes after unmount of\|\ +ldiskfs_check_descriptors: Checksum for group 0 failed\|\ +group descriptors corrupted" + + res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors") + [ -z "$res" ] && return 0 + echo "Kernel error detected: $res" + return 1 +} + # # Run a single test function and cleanup after it. # @@ -5058,12 +5020,17 @@ run_one() { local SAVE_UMASK=`umask` umask 0022 + if ! grep -q $DIR /proc/mounts; then + $SETUP + fi + banner "test $testnum: $message" test_${testnum} || error "test_$testnum failed with $?" cd $SAVE_PWD reset_fail_loc check_grant ${testnum} || error "check_grant $testnum failed with $?" check_catastrophe || error "LBUG/LASSERT detected" + check_dmesg_for_errors || error "Error in dmesg detected" if [ "$PARALLEL" != "yes" ]; then ps auxww | grep -v grep | grep -q multiop && error "multiop still running" @@ -5072,6 +5039,7 @@ run_one() { unset tdir unset tfile umask $SAVE_UMASK + $CLEANUP return 0 } @@ -6184,19 +6152,21 @@ create_pool() { } add_pool_to_list () { - local fsname=${1%%.*} - local poolname=${1##$fsname.} + local fsname=${1%%.*} + local poolname=${1##$fsname.} - local listvar=${fsname}_CREATED_POOLS - eval export ${listvar}=$(expand_list ${!listvar} $poolname) + local listvar=${fsname}_CREATED_POOLS + local temp=${listvar}=$(expand_list ${!listvar} $poolname) + eval export $temp } remove_pool_from_list () { - local fsname=${1%%.*} - local poolname=${1##$fsname.} + local fsname=${1%%.*} + local poolname=${1##$fsname.} - local listvar=${fsname}_CREATED_POOLS - eval export ${listvar}=$(exclude_items_from_list ${!listvar} $poolname) + local listvar=${fsname}_CREATED_POOLS + local temp=${listvar}=$(exclude_items_from_list ${!listvar} $poolname) + eval export $temp } destroy_pool_int() { @@ -6333,6 +6303,31 @@ max_recovery_time() { echo -n $service_time } +recovery_time_min() { + local connection_switch_min=5 + local connection_switch_inc=5 + local connection_switch_max + local reconnect_delay_max + local initial_connect_timeout + local max + local timout_20 + + #connection_switch_max=min(50, max($connection_switch_min,$TIMEOUT) + (($connection_switch_min > $TIMEOUT)) && + max=$connection_switch_min || max=$TIMEOUT + (($max < 50)) && connection_switch_max=$max || connection_switch_max=50 + + #initial_connect_timeout = max(connection_switch_min, obd_timeout/20) + timeout_20=$((TIMEOUT/20)) + (($connection_switch_min > $timeout_20)) && + initial_connect_timeout=$connection_switch_min || + initial_connect_timeout=$timeout_20 + + reconnect_delay_max=$((connection_switch_max + connection_switch_inc + \ + initial_connect_timeout)) + echo $((2 * reconnect_delay_max)) +} + get_clients_mount_count () { local clients=${CLIENTS:-`hostname`} @@ -7006,7 +7001,7 @@ mds_backup_restore() { echo "backup data" ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3 # step 6: umount - ${rcmd} umount -d $mntpt || return 4 + ${rcmd} $UMOUNT $mntpt || return 4 # step 8: reformat dev echo "reformat new device" format_mdt $(facet_number $facet) @@ -7022,7 +7017,7 @@ mds_backup_restore() { echo "remove recovery logs" ${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS # step 13: umount dev - ${rcmd} umount -d $mntpt || return 10 + ${rcmd} $UMOUNT $mntpt || return 10 # step 14: cleanup tmp backup ${rcmd} rm -f $metaea $metadata # step 15: reset device label - it's not virgin on @@ -7062,7 +7057,7 @@ mds_remove_ois() { done fi # step 4: umount - ${rcmd} umount -d $mntpt || return 2 + ${rcmd} $UMOUNT $mntpt || return 2 # OI files will be recreated when mounted as lustre next time. }