lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2
   3 LUSTRE=${LUSTRE:-$(dirname $0)/..}
   4 . $LUSTRE/tests/test-framework.sh
   5 init_test_env $@
   6 init_logging
   7
   8 ALWAYS_EXCEPT="$LNET_SELFTEST_EXCEPT"
   9 if (( $LINUX_VERSION_CODE >= $(version_code 4.4.0) )); then
  10         # bug number for skipped test: LU-10073
  11         ALWAYS_EXCEPT+="               smoke"
  12 fi
  13
  14 build_test_filter
  15
  16 [ x$LST = x ] && skip_env "lst not found LST=$LST"
  17
  18 # FIXME: what is the reasonable value here?
  19 lst_LOOP=${lst_LOOP:-100000}
  20 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  21 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  22 if [ "$SLOW" = no ]; then
  23     lst_CONCR="1 8"
  24     lst_SIZES="4k 1M"
  25     lst_LOOP=1000
  26 fi
  27
  28 smoke_DURATION=${smoke_DURATION:-1800}
  29 if [ "$SLOW" = no ]; then
  30     [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  31 fi
  32
  33 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  34 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  35 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  36 interim_umount=false
  37 interim_umount1=false
  38
  39 #
  40 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  41 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  42 # in this file earlier.
  43 # Parameter: None
  44 # Returns: None. Exit with error if client mount fails.
  45 #
  46 _restore_mount () {
  47         if $interim_umount && ! is_mounted $MOUNT; then
  48                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  49         fi
  50
  51         if $interim_umount1 && ! is_mounted $MOUNT2; then
  52                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  53         fi
  54 }
  55
  56 if local_mode; then
  57    lst_SERVERS=`hostname`
  58    lst_CLIENTS=`hostname`
  59 fi
  60
  61 # FIXME: do we really need to unload lustre modules on all nodes?
  62 # bug 19387, comment 9
  63 # unloading lustre modules is not strictly necessary but unmounting
  64 # /mnt/lustre before running lst would be useful:
  65 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  66 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  67 # that mounted lustre wouldn't able to perform some of its background activities
  68 if is_mounted $MOUNT; then
  69         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  70         interim_umount=true
  71 fi
  72
  73 if is_mounted $MOUNT2; then
  74         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  75         interim_umount1=true
  76 fi
  77
  78 lst_prepare () {
  79     # Workaround for bug 15619
  80     lst_cleanup_all
  81     lst_setup_all
  82 }
  83
  84 # make batch
  85 test_smoke_sub () {
  86     local servers=$1
  87     local clients=$2
  88
  89
  90     local nc=$(echo ${clients//,/ } | wc -w)
  91     local ns=$(echo ${servers//,/ } | wc -w)
  92     echo '#!/bin/bash'
  93     echo 'set -e'
  94
  95     echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
  96
  97     echo "$LST new_session --timeo 100000 hh"
  98     echo "$LST add_group c $(nids_list $clients)"
  99     echo "$LST add_group s $(nids_list $servers)"
 100     echo "$LST add_batch b"
 101
 102     pre="$LST add_test --batch b --loop $lst_LOOP "
 103     for t in "brw read" "brw write" ; do
 104         for s in $lst_SIZES; do
 105             for c in $lst_CONCR; do
 106                 for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 107                     echo -n "$pre"
 108                     echo " --concurrency $c --distribute $d $t check=full size=$s"
 109                  done
 110             done
 111         done
 112     done
 113
 114     for c in $lst_CONCR; do
 115         for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 116             echo -n "$pre"
 117             echo " --concurrency $c --distribute $d ping "
 118         done
 119     done
 120
 121     echo $LST run b
 122     echo sleep 1
 123     echo "$LST stat --delay 10 --timeout 10 c s &"
 124     echo 'pid=$!'
 125     echo 'trap "cleanup $pid" INT TERM'
 126     echo sleep $smoke_DURATION
 127     echo 'cleanup $pid'
 128
 129 }
 130
 131 run_lst () {
 132    local file=$1
 133
 134    export LST_SESSION=$$
 135
 136    # start lst
 137    sh $file
 138 }
 139
 140 check_lst_err () {
 141         local log=$1
 142
 143         grep ^Total $log
 144
 145         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 146                 _restore_mount
 147                 error 'lst Error found'
 148         fi
 149 }
 150
 151 test_smoke () {
 152         lst_prepare
 153
 154         local servers=$lst_SERVERS
 155         local clients=$lst_CLIENTS
 156
 157         local runlst=$TMP/smoke.sh
 158
 159         local log=$TMP/$tfile.log
 160         local rc=0
 161
 162         test_smoke_sub $servers $clients 2>&1 > $runlst
 163
 164         cat $runlst
 165
 166         run_lst $runlst | tee $log
 167         rc=${PIPESTATUS[0]}
 168         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 169
 170         lst_end_session --verbose | tee -a $log
 171
 172         # error counters in "lst show_error" should be checked
 173         check_lst_err $log
 174         lst_cleanup_all
 175 }
 176 run_test smoke "lst regression test"
 177
 178 complete $SECONDS
 179 _restore_mount
 180 check_and_cleanup_lustre
 181 exit_status