lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2
   3 LUSTRE=${LUSTRE:-$(dirname $0)/..}
   4 . $LUSTRE/tests/test-framework.sh
   5 init_test_env $@
   6 init_logging
   7
   8 ALWAYS_EXCEPT="$LNET_SELFTEST_EXCEPT"
   9 if [[ $(uname -m) = ppc64 ]]; then
  10         # bug number for skipped test: LU-10073
  11         ALWAYS_EXCEPT+="               smoke "
  12 fi
  13
  14 build_test_filter
  15
  16 [ x$LST = x ] && skip_env "lst not found LST=$LST"
  17
  18 # FIXME: what is the reasonable value here?
  19 lst_LOOP=${lst_LOOP:-100000}
  20 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  21 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  22 if [ "$SLOW" = no ]; then
  23     lst_CONCR="1 8"
  24     lst_SIZES="4k 1M"
  25     lst_LOOP=1000
  26 fi
  27
  28 smoke_DURATION=${smoke_DURATION:-1800}
  29 if [ "$SLOW" = no ]; then
  30     [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  31 fi
  32
  33 lst_TESTS=${lst_TESTS:-"write read ping"}
  34
  35 # "none" -> LST_BRW_CHECK_NONE
  36 # "full" -> LST_BRW_CHECK_FULL
  37 # "simple" -> LST_BRW_CHECK_SIMPLE
  38 lst_CHECK=${lst_CHECK:-"full"}
  39
  40 lst_FROM=${lst_FROM:-"cs"}
  41
  42 case $lst_CHECK in
  43         full|simple) check="check=$lst_CHECK";;
  44         none) check="";;
  45         *) error Unknown flag $lst_CHECK;;
  46 esac
  47
  48 LOAD_MODULES_REMOTE=true load_modules
  49
  50 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  51 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  52 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  53 interim_umount=false
  54 interim_umount1=false
  55
  56 #
  57 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  58 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  59 # in this file earlier.
  60 # Parameter: None
  61 # Returns: None. Exit with error if client mount fails.
  62 #
  63 _restore_mount () {
  64         if $interim_umount && ! is_mounted $MOUNT; then
  65                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  66         fi
  67
  68         if $interim_umount1 && ! is_mounted $MOUNT2; then
  69                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  70         fi
  71 }
  72
  73 if local_mode; then
  74    lst_SERVERS=`hostname`
  75    lst_CLIENTS=`hostname`
  76 fi
  77
  78 # FIXME: do we really need to unload lustre modules on all nodes?
  79 # bug 19387, comment 9
  80 # unloading lustre modules is not strictly necessary but unmounting
  81 # /mnt/lustre before running lst would be useful:
  82 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  83 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  84 # that mounted lustre wouldn't able to perform some of its background activities
  85 if is_mounted $MOUNT; then
  86         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  87         interim_umount=true
  88 fi
  89
  90 if is_mounted $MOUNT2; then
  91         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  92         interim_umount1=true
  93 fi
  94
  95 lst_prepare () {
  96     # Workaround for bug 15619
  97     lst_cleanup_all
  98     lst_setup_all
  99 }
 100
 101 # make batch
 102 test_smoke_sub () {
 103         local servers=$1
 104         local clients=$2
 105
 106         local nc=$(echo ${clients//,/ } | wc -w)
 107         local ns=$(echo ${servers//,/ } | wc -w)
 108         echo '#!/bin/bash'
 109         echo 'set -e'
 110
 111         echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
 112
 113         echo "$LST new_session --timeo 100000 hh"
 114         echo "$LST add_group c $(nids_list $clients)"
 115         echo "$LST add_group s $(nids_list $servers)"
 116         echo "$LST add_batch b"
 117
 118         declare -a tests
 119
 120         case $lst_FROM in
 121                 c) tests[0]="${nc}:${ns} --from c --to s";;
 122                 s) tests[0]="${ns}:${nc} --from s --to c";;
 123                 cs)tests[0]="${nc}:${ns} --from c --to s"
 124                    tests[1]="${ns}:${nc} --from s --to c";;
 125                 *) error Unknown flag $lst_FROM;;
 126         esac
 127
 128         pre="$LST add_test --batch b --loop $lst_LOOP "
 129         for t in $lst_TESTS; do
 130                 for s in $lst_SIZES; do
 131                         for c in $lst_CONCR; do
 132                                 for ((i=0; i<${#tests[@]}; i++)); do
 133                                         echo -n "$pre --concurrency $c"\
 134                                                 " --distribute ${tests[i]} "
 135                                         case $t in
 136                                                 read|write)
 137                                                         echo -n "brw $t" \
 138                                                         " $check size=$s";;
 139                                                 ping)
 140                                                         echo -n $t;;
 141                                                 *) error Unknonwn LST test;;
 142                                         esac
 143                                         echo
 144                                 done
 145                         done
 146                 done
 147         done
 148
 149         echo $LST run b
 150         echo sleep 1
 151         echo "$LST stat --delay 10 --timeout 10 c s &"
 152         echo 'pid=$!'
 153         echo 'trap "cleanup $pid" INT TERM'
 154         echo sleep $smoke_DURATION
 155         echo 'cleanup $pid'
 156 }
 157
 158 run_lst () {
 159         local file=$1
 160
 161         export LST_SESSION=$$
 162
 163         # start lst
 164         bash $file
 165 }
 166
 167 check_lst_err () {
 168         local log=$1
 169
 170         grep ^Total $log
 171
 172         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 173                 _restore_mount
 174                 error 'lst Error found'
 175         fi
 176 }
 177
 178 test_smoke () {
 179         lst_prepare
 180
 181         local servers=$lst_SERVERS
 182         local clients=$lst_CLIENTS
 183
 184         local runlst=$TMP/smoke.sh
 185
 186         local log=$TMP/$tfile.log
 187         local rc=0
 188
 189         test_smoke_sub $servers $clients 2>&1 > $runlst
 190
 191         cat $runlst
 192
 193         run_lst $runlst | tee $log
 194         rc=${PIPESTATUS[0]}
 195         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 196
 197         lst_end_session --verbose | tee -a $log
 198
 199         # error counters in "lst show_error" should be checked
 200         check_lst_err $log
 201         lst_cleanup_all
 202 }
 203 run_test smoke "lst regression test"
 204
 205 complete $SECONDS
 206 _restore_mount
 207 check_and_cleanup_lustre
 208 exit_status