lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2
   3 LUSTRE=${LUSTRE:-$(dirname $0)/..}
   4 . $LUSTRE/tests/test-framework.sh
   5 init_test_env $@
   6 init_logging
   7
   8 ALWAYS_EXCEPT="$LNET_SELFTEST_EXCEPT"
   9 if [[ $(uname -m) = aarch64 ]]; then
  10         # bug number for skipped test: LU-10073
  11         ALWAYS_EXCEPT+="               smoke"
  12 fi
  13
  14 # Check if running on Ubuntu client
  15 if [ -r /etc/os-release ]; then
  16         if grep -qi ubuntu /etc/os-release; then
  17                 # bug number for skipped test: LU-10073
  18                 ALWAYS_EXCEPT+="               smoke"
  19         fi
  20 fi
  21
  22 build_test_filter
  23
  24 [ x$LST = x ] && skip_env "lst not found LST=$LST"
  25
  26 # FIXME: what is the reasonable value here?
  27 lst_LOOP=${lst_LOOP:-100000}
  28 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  29 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  30 if [ "$SLOW" = no ]; then
  31     lst_CONCR="1 8"
  32     lst_SIZES="4k 1M"
  33     lst_LOOP=1000
  34 fi
  35
  36 smoke_DURATION=${smoke_DURATION:-1800}
  37 if [ "$SLOW" = no ]; then
  38     [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  39 fi
  40
  41 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  42 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  43 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  44 interim_umount=false
  45 interim_umount1=false
  46
  47 #
  48 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  49 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  50 # in this file earlier.
  51 # Parameter: None
  52 # Returns: None. Exit with error if client mount fails.
  53 #
  54 _restore_mount () {
  55         if $interim_umount && ! is_mounted $MOUNT; then
  56                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  57         fi
  58
  59         if $interim_umount1 && ! is_mounted $MOUNT2; then
  60                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  61         fi
  62 }
  63
  64 is_mounted () {
  65     local mntpt=$1
  66     local mounted=$(mounted_lustre_filesystems)
  67     echo $mounted' ' | grep -w -q $mntpt' '
  68 }
  69
  70 if local_mode; then
  71    lst_SERVERS=`hostname`
  72    lst_CLIENTS=`hostname`
  73 fi
  74
  75 # FIXME: do we really need to unload lustre modules on all nodes?
  76 # bug 19387, comment 9
  77 # unloading lustre modules is not strictly necessary but unmounting
  78 # /mnt/lustre before running lst would be useful:
  79 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  80 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  81 # that mounted lustre wouldn't able to perform some of its background activities
  82 if is_mounted $MOUNT; then
  83         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  84         interim_umount=true
  85 fi
  86
  87 if is_mounted $MOUNT2; then
  88         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  89         interim_umount1=true
  90 fi
  91
  92 lst_prepare () {
  93     # Workaround for bug 15619
  94     lst_cleanup_all
  95     lst_setup_all
  96 }
  97
  98 # make batch
  99 test_smoke_sub () {
 100     local servers=$1
 101     local clients=$2
 102
 103
 104     local nc=$(echo ${clients//,/ } | wc -w)
 105     local ns=$(echo ${servers//,/ } | wc -w)
 106     echo '#!/bin/bash'
 107     echo 'set -e'
 108
 109     echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
 110
 111     echo "$LST new_session --timeo 100000 hh"
 112     echo "$LST add_group c $(nids_list $clients)"
 113     echo "$LST add_group s $(nids_list $servers)"
 114     echo "$LST add_batch b"
 115
 116     pre="$LST add_test --batch b --loop $lst_LOOP "
 117     for t in "brw read" "brw write" ; do
 118         for s in $lst_SIZES; do
 119             for c in $lst_CONCR; do
 120                 for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 121                     echo -n "$pre"
 122                     echo " --concurrency $c --distribute $d $t check=full size=$s"
 123                  done
 124             done
 125         done
 126     done
 127
 128     for c in $lst_CONCR; do
 129         for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 130             echo -n "$pre"
 131             echo " --concurrency $c --distribute $d ping "
 132         done
 133     done
 134
 135     echo $LST run b
 136     echo sleep 1
 137     echo "$LST stat --delay 10 --timeout 10 c s &"
 138     echo 'pid=$!'
 139     echo 'trap "cleanup $pid" INT TERM'
 140     echo sleep $smoke_DURATION
 141     echo 'cleanup $pid'
 142
 143 }
 144
 145 run_lst () {
 146    local file=$1
 147
 148    export LST_SESSION=$$
 149
 150    # start lst
 151    sh $file
 152 }
 153
 154 check_lst_err () {
 155         local log=$1
 156
 157         grep ^Total $log
 158
 159         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 160                 _restore_mount
 161                 error 'lst Error found'
 162         fi
 163 }
 164
 165 test_smoke () {
 166         lst_prepare
 167
 168         local servers=$lst_SERVERS
 169         local clients=$lst_CLIENTS
 170
 171         local runlst=$TMP/smoke.sh
 172
 173         local log=$TMP/$tfile.log
 174         local rc=0
 175
 176         test_smoke_sub $servers $clients 2>&1 > $runlst
 177
 178         cat $runlst
 179
 180         run_lst $runlst | tee $log
 181         rc=${PIPESTATUS[0]}
 182         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 183
 184         lst_end_session --verbose | tee -a $log
 185
 186         # error counters in "lst show_error" should be checked
 187         check_lst_err $log
 188         lst_cleanup_all
 189 }
 190 run_test smoke "lst regression test"
 191
 192 complete $SECONDS
 193 _restore_mount
 194 check_and_cleanup_lustre
 195 exit_status