lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2
   3 LUSTRE=${LUSTRE:-$(dirname $0)/..}
   4 . $LUSTRE/tests/test-framework.sh
   5 init_test_env $@
   6 init_logging
   7
   8 ALWAYS_EXCEPT="$LNET_SELFTEST_EXCEPT"
   9 if (( $LINUX_VERSION_CODE >= $(version_code 4.4.0) )); then
  10         # bug number for skipped test: LU-10073
  11         ALWAYS_EXCEPT+="               smoke "
  12 fi
  13
  14 if [[ $(uname -m) = ppc64 ]]; then
  15         # bug number for skipped test: LU-10073
  16         ALWAYS_EXCEPT+="               smoke "
  17 fi
  18
  19 build_test_filter
  20
  21 [ x$LST = x ] && skip_env "lst not found LST=$LST"
  22
  23 # FIXME: what is the reasonable value here?
  24 lst_LOOP=${lst_LOOP:-100000}
  25 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  26 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  27 if [ "$SLOW" = no ]; then
  28     lst_CONCR="1 8"
  29     lst_SIZES="4k 1M"
  30     lst_LOOP=1000
  31 fi
  32
  33 smoke_DURATION=${smoke_DURATION:-1800}
  34 if [ "$SLOW" = no ]; then
  35     [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  36 fi
  37
  38 lst_TESTS=${lst_TESTS:-"write read ping"}
  39
  40 # "none" -> LST_BRW_CHECK_NONE
  41 # "full" -> LST_BRW_CHECK_FULL
  42 # "simple" -> LST_BRW_CHECK_SIMPLE
  43 lst_CHECK=${lst_CHECK:-"full"}
  44
  45 lst_FROM=${lst_FROM:-"cs"}
  46
  47 case $lst_CHECK in
  48         full|simple) check="check=$lst_CHECK";;
  49         none) check="";;
  50         *) error Unknown flag $lst_CHECK;;
  51 esac
  52
  53 LOAD_MODULES_REMOTE=true load_modules
  54
  55 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  56 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  57 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  58 interim_umount=false
  59 interim_umount1=false
  60
  61 #
  62 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  63 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  64 # in this file earlier.
  65 # Parameter: None
  66 # Returns: None. Exit with error if client mount fails.
  67 #
  68 _restore_mount () {
  69         if $interim_umount && ! is_mounted $MOUNT; then
  70                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  71         fi
  72
  73         if $interim_umount1 && ! is_mounted $MOUNT2; then
  74                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  75         fi
  76 }
  77
  78 if local_mode; then
  79    lst_SERVERS=`hostname`
  80    lst_CLIENTS=`hostname`
  81 fi
  82
  83 # FIXME: do we really need to unload lustre modules on all nodes?
  84 # bug 19387, comment 9
  85 # unloading lustre modules is not strictly necessary but unmounting
  86 # /mnt/lustre before running lst would be useful:
  87 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  88 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  89 # that mounted lustre wouldn't able to perform some of its background activities
  90 if is_mounted $MOUNT; then
  91         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  92         interim_umount=true
  93 fi
  94
  95 if is_mounted $MOUNT2; then
  96         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  97         interim_umount1=true
  98 fi
  99
 100 lst_prepare () {
 101     # Workaround for bug 15619
 102     lst_cleanup_all
 103     lst_setup_all
 104 }
 105
 106 # make batch
 107 test_smoke_sub () {
 108     local servers=$1
 109     local clients=$2
 110
 111     local nc=$(echo ${clients//,/ } | wc -w)
 112     local ns=$(echo ${servers//,/ } | wc -w)
 113     echo '#!/bin/bash'
 114     echo 'set -e'
 115
 116     echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
 117
 118     echo "$LST new_session --timeo 100000 hh"
 119     echo "$LST add_group c $(nids_list $clients)"
 120     echo "$LST add_group s $(nids_list $servers)"
 121     echo "$LST add_batch b"
 122
 123         declare -a tests
 124
 125         case $lst_FROM in
 126                 c) tests[0]="${nc}:${ns} --from c --to s";;
 127                 s) tests[0]="${ns}:${nc} --from s --to c";;
 128                 cs)tests[0]="${nc}:${ns} --from c --to s"
 129                    tests[1]="${ns}:${nc} --from s --to c";;
 130                 *) error Unknown flag $lst_FROM;;
 131         esac
 132
 133         pre="$LST add_test --batch b --loop $lst_LOOP "
 134         for t in $lst_TESTS; do
 135                 for s in $lst_SIZES; do
 136                         for c in $lst_CONCR; do
 137                                 for ((i=0; i<${#tests[@]}; i++)); do
 138                                         echo -n "$pre --concurrency $c"\
 139                                                 " --distribute ${tests[i]} "
 140                                         case $t in
 141                                                 read|write)
 142                                                         echo -n "brw $t" \
 143                                                         " $check size=$s";;
 144                                                 ping)
 145                                                         echo -n $t;;
 146                                                 *) error Unknonwn LST test;;
 147                                         esac
 148                                         echo
 149                                 done
 150                         done
 151                 done
 152         done
 153
 154     echo $LST run b
 155     echo sleep 1
 156     echo "$LST stat --delay 10 --timeout 10 c s &"
 157     echo 'pid=$!'
 158     echo 'trap "cleanup $pid" INT TERM'
 159     echo sleep $smoke_DURATION
 160     echo 'cleanup $pid'
 161
 162 }
 163
 164 run_lst () {
 165    local file=$1
 166
 167    export LST_SESSION=$$
 168
 169    # start lst
 170    sh $file
 171 }
 172
 173 check_lst_err () {
 174         local log=$1
 175
 176         grep ^Total $log
 177
 178         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 179                 _restore_mount
 180                 error 'lst Error found'
 181         fi
 182 }
 183
 184 test_smoke () {
 185         lst_prepare
 186
 187         local servers=$lst_SERVERS
 188         local clients=$lst_CLIENTS
 189
 190         local runlst=$TMP/smoke.sh
 191
 192         local log=$TMP/$tfile.log
 193         local rc=0
 194
 195         test_smoke_sub $servers $clients 2>&1 > $runlst
 196
 197         cat $runlst
 198
 199         run_lst $runlst | tee $log
 200         rc=${PIPESTATUS[0]}
 201         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 202
 203         lst_end_session --verbose | tee -a $log
 204
 205         # error counters in "lst show_error" should be checked
 206         check_lst_err $log
 207         lst_cleanup_all
 208 }
 209 run_test smoke "lst regression test"
 210
 211 complete $SECONDS
 212 _restore_mount
 213 check_and_cleanup_lustre
 214 exit_status