lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2
   3 LUSTRE=${LUSTRE:-$(dirname $0)/..}
   4 . $LUSTRE/tests/test-framework.sh
   5 init_test_env "$@"
   6 init_logging
   7
   8 ALWAYS_EXCEPT="$LNET_SELFTEST_EXCEPT"
   9
  10 build_test_filter
  11
  12 [ x$LST = x ] && skip_env "lst not found LST=$LST"
  13
  14 # FIXME: what is the reasonable value here?
  15 lst_LOOP=${lst_LOOP:-100000}
  16 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  17 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  18 if [ "$SLOW" = no ]; then
  19         lst_CONCR="1 8"
  20         lst_SIZES="4k 1M"
  21         lst_LOOP=1000
  22 fi
  23
  24 smoke_DURATION=${smoke_DURATION:-1800}
  25 if [ "$SLOW" = no ]; then
  26         [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  27 fi
  28
  29 lst_TESTS=${lst_TESTS:-"write read ping"}
  30
  31 # "none" -> LST_BRW_CHECK_NONE
  32 # "full" -> LST_BRW_CHECK_FULL
  33 # "simple" -> LST_BRW_CHECK_SIMPLE
  34 lst_CHECK=${lst_CHECK:-"full"}
  35
  36 lst_FROM=${lst_FROM:-"cs"}
  37
  38 case $lst_CHECK in
  39         full|simple) check="check=$lst_CHECK";;
  40         none) check="";;
  41         *) error Unknown flag $lst_CHECK;;
  42 esac
  43
  44 LOAD_MODULES_REMOTE=true load_modules
  45
  46 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  47 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  48 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  49 interim_umount=false
  50 interim_umount1=false
  51
  52 #
  53 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  54 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  55 # in this file earlier.
  56 # Parameter: None
  57 # Returns: None. Exit with error if client mount fails.
  58 #
  59 _restore_mount () {
  60         if $interim_umount && ! is_mounted $MOUNT; then
  61                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  62         fi
  63
  64         if $interim_umount1 && ! is_mounted $MOUNT2; then
  65                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  66         fi
  67 }
  68
  69 if local_mode; then
  70    lst_SERVERS=`hostname`
  71    lst_CLIENTS=`hostname`
  72 fi
  73
  74 # FIXME: do we really need to unload lustre modules on all nodes?
  75 # bug 19387, comment 9
  76 # unloading lustre modules is not strictly necessary but unmounting
  77 # /mnt/lustre before running lst would be useful:
  78 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  79 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  80 # that mounted lustre wouldn't able to perform some of its background activities
  81 if is_mounted $MOUNT; then
  82         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  83         interim_umount=true
  84 fi
  85
  86 if is_mounted $MOUNT2; then
  87         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  88         interim_umount1=true
  89 fi
  90
  91 lst_prepare () {
  92         # Workaround for bug 15619
  93         lst_cleanup_all
  94         lst_setup_all
  95 }
  96
  97 # make batch
  98 test_smoke_sub () {
  99         local servers=$1
 100         local clients=$2
 101
 102         local nc=$(echo ${clients//,/ } | wc -w)
 103         local ns=$(echo ${servers//,/ } | wc -w)
 104         echo '#!/bin/bash'
 105         echo 'set -e'
 106
 107         echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
 108
 109         echo "$LST new_session --timeo 100000 hh"
 110         echo "$LST add_group c $(nids_list $clients)"
 111         echo "$LST add_group s $(nids_list $servers)"
 112         echo "$LST add_batch b"
 113
 114         declare -a tests
 115
 116         case $lst_FROM in
 117                 c) tests[0]="${nc}:${ns} --from c --to s";;
 118                 s) tests[0]="${ns}:${nc} --from s --to c";;
 119                 cs)tests[0]="${nc}:${ns} --from c --to s"
 120                    tests[1]="${ns}:${nc} --from s --to c";;
 121                 *) error Unknown flag $lst_FROM;;
 122         esac
 123
 124         pre="$LST add_test --batch b --loop $lst_LOOP "
 125         for t in $lst_TESTS; do
 126                 for s in $lst_SIZES; do
 127                         for c in $lst_CONCR; do
 128                                 for ((i=0; i<${#tests[@]}; i++)); do
 129                                         echo -n "$pre --concurrency $c"\
 130                                                 " --distribute ${tests[i]} "
 131                                         case $t in
 132                                                 read|write)
 133                                                         echo -n "brw $t" \
 134                                                         " $check size=$s";;
 135                                                 ping)
 136                                                         echo -n $t;;
 137                                                 *) error Unknonwn LST test;;
 138                                         esac
 139                                         echo
 140                                 done
 141                         done
 142                 done
 143         done
 144
 145         echo $LST run b
 146         echo sleep 1
 147         echo "$LST stat --delay 10 --timeout 10 c s &"
 148         echo 'pid=$!'
 149         echo 'trap "cleanup $pid" INT TERM'
 150         echo sleep $smoke_DURATION
 151         echo 'cleanup $pid'
 152 }
 153
 154 run_lst () {
 155         local file=$1
 156
 157         export LST_SESSION=$$
 158
 159         # start lst
 160         bash $file
 161 }
 162
 163 check_lst_err () {
 164         local log=$1
 165
 166         grep ^Total $log
 167
 168         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 169                 _restore_mount
 170                 error 'lst Error found'
 171         fi
 172 }
 173
 174 test_smoke () {
 175         lst_prepare
 176
 177         local servers=$lst_SERVERS
 178         local clients=$lst_CLIENTS
 179
 180         local runlst=$TMP/smoke.sh
 181
 182         local log=$TMP/$tfile.log
 183         local rc=0
 184
 185         test_smoke_sub $servers $clients 2>&1 > $runlst
 186
 187         cat $runlst
 188
 189         run_lst $runlst | tee $log
 190         rc=${PIPESTATUS[0]}
 191         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 192
 193         lst_end_session --verbose | tee -a $log
 194
 195         # error counters in "lst show_error" should be checked
 196         check_lst_err $log
 197         lst_cleanup_all
 198 }
 199 run_test smoke "lst regression test"
 200
 201 complete_test $SECONDS
 202 _restore_mount
 203 check_and_cleanup_lustre
 204 exit_status