lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
   3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
   4
   5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
   6 . $LUSTRE/tests/test-framework.sh
   7 init_test_env $@
   8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
   9 init_logging
  10
  11 ALWAYS_EXCEPT="$LNET_SELFTEST_EXCEPT"
  12 if [[ $(uname -m) = aarch64 ]]; then
  13         # bug number for skipped test: LU-10073
  14         ALWAYS_EXCEPT+="               smoke"
  15 fi
  16
  17 # Check if running on Ubuntu client
  18 if [ -r /etc/os-release ]; then
  19         if grep -qi ubuntu /etc/os-release; then
  20                 # bug number for skipped test: LU-10073
  21                 ALWAYS_EXCEPT+="               smoke"
  22         fi
  23 fi
  24
  25 [ x$LST = x ] && skip_env "lst not found LST=$LST"
  26
  27 # FIXME: what is the reasonable value here?
  28 lst_LOOP=${lst_LOOP:-100000}
  29 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  30 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  31 if [ "$SLOW" = no ]; then
  32     lst_CONCR="1 8"
  33     lst_SIZES="4k 1M"
  34     lst_LOOP=1000
  35 fi
  36
  37 smoke_DURATION=${smoke_DURATION:-1800}
  38 if [ "$SLOW" = no ]; then
  39     [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  40 fi
  41
  42 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  43 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  44 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  45 interim_umount=false
  46 interim_umount1=false
  47
  48 #
  49 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  50 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  51 # in this file earlier.
  52 # Parameter: None
  53 # Returns: None. Exit with error if client mount fails.
  54 #
  55 _restore_mount () {
  56         if $interim_umount && ! is_mounted $MOUNT; then
  57                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  58         fi
  59
  60         if $interim_umount1 && ! is_mounted $MOUNT2; then
  61                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  62         fi
  63 }
  64
  65 is_mounted () {
  66     local mntpt=$1
  67     local mounted=$(mounted_lustre_filesystems)
  68     echo $mounted' ' | grep -w -q $mntpt' '
  69 }
  70
  71 if local_mode; then
  72    lst_SERVERS=`hostname`
  73    lst_CLIENTS=`hostname`
  74 fi
  75
  76 # FIXME: do we really need to unload lustre modules on all nodes?
  77 # bug 19387, comment 9
  78 # unloading lustre modules is not strictly necessary but unmounting
  79 # /mnt/lustre before running lst would be useful:
  80 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  81 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  82 # that mounted lustre wouldn't able to perform some of its background activities
  83 if is_mounted $MOUNT; then
  84         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  85         interim_umount=true
  86 fi
  87
  88 if is_mounted $MOUNT2; then
  89         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  90         interim_umount1=true
  91 fi
  92
  93 build_test_filter
  94
  95 lst_prepare () {
  96     # Workaround for bug 15619
  97     lst_cleanup_all
  98     lst_setup_all
  99 }
 100
 101 # make batch
 102 test_smoke_sub () {
 103     local servers=$1
 104     local clients=$2
 105
 106
 107     local nc=$(echo ${clients//,/ } | wc -w)
 108     local ns=$(echo ${servers//,/ } | wc -w)
 109     echo '#!/bin/bash'
 110     echo 'set -e'
 111
 112     echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
 113
 114     echo "$LST new_session --timeo 100000 hh"
 115     echo "$LST add_group c $(nids_list $clients)"
 116     echo "$LST add_group s $(nids_list $servers)"
 117     echo "$LST add_batch b"
 118
 119     pre="$LST add_test --batch b --loop $lst_LOOP "
 120     for t in "brw read" "brw write" ; do
 121         for s in $lst_SIZES; do
 122             for c in $lst_CONCR; do
 123                 for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 124                     echo -n "$pre"
 125                     echo " --concurrency $c --distribute $d $t check=full size=$s"
 126                  done
 127             done
 128         done
 129     done
 130
 131     for c in $lst_CONCR; do
 132         for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 133             echo -n "$pre"
 134             echo " --concurrency $c --distribute $d ping "
 135         done
 136     done
 137
 138     echo $LST run b
 139     echo sleep 1
 140     echo "$LST stat --delay 10 --timeout 10 c s &"
 141     echo 'pid=$!'
 142     echo 'trap "cleanup $pid" INT TERM'
 143     echo sleep $smoke_DURATION
 144     echo 'cleanup $pid'
 145
 146 }
 147
 148 run_lst () {
 149    local file=$1
 150
 151    export LST_SESSION=$$
 152
 153    # start lst
 154    sh $file
 155 }
 156
 157 check_lst_err () {
 158         local log=$1
 159
 160         grep ^Total $log
 161
 162         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 163                 _restore_mount
 164                 error 'lst Error found'
 165         fi
 166 }
 167
 168 test_smoke () {
 169         lst_prepare
 170
 171         local servers=$lst_SERVERS
 172         local clients=$lst_CLIENTS
 173
 174         local runlst=$TMP/smoke.sh
 175
 176         local log=$TMP/$tfile.log
 177         local rc=0
 178
 179         test_smoke_sub $servers $clients 2>&1 > $runlst
 180
 181         cat $runlst
 182
 183         run_lst $runlst | tee $log
 184         rc=${PIPESTATUS[0]}
 185         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 186
 187         lst_end_session --verbose | tee -a $log
 188
 189         # error counters in "lst show_error" should be checked
 190         check_lst_err $log
 191         lst_cleanup_all
 192 }
 193 run_test smoke "lst regression test"
 194
 195 complete $SECONDS
 196 _restore_mount
 197 check_and_cleanup_lustre
 198 exit_status