lustre/tests/lnet-selftest.sh

   1 #!/bin/bash
   2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
   3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
   4
   5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
   6 . $LUSTRE/tests/test-framework.sh
   7 init_test_env $@
   8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
   9 init_logging
  10
  11 #
  12 ALWAYS_EXCEPT="$ALWAYS_EXCEPT $LNET_SELFTEST_EXCEPT"
  13
  14 [ x$LST = x ] && { skip_env "lst not found LST=$LST" && exit 0; }
  15
  16 # FIXME: what is the reasonable value here?
  17 lst_LOOP=${lst_LOOP:-100000}
  18 lst_CONCR=${lst_CONCR:-"1 2 4 8"}
  19 lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"}
  20 if [ "$SLOW" = no ]; then
  21     lst_CONCR="1 8"
  22     lst_SIZES="4k 1M"
  23     lst_LOOP=1000
  24 fi
  25
  26 smoke_DURATION=${smoke_DURATION:-1800}
  27 if [ "$SLOW" = no ]; then
  28     [ $smoke_DURATION -le 300 ] || smoke_DURATION=300
  29 fi
  30
  31 nodes=$(comma_list "$(osts_nodes) $(mdts_nodes)")
  32 lst_SERVERS=${lst_SERVERS:-$(comma_list "$(host_nids_address $nodes $NETTYPE)")}
  33 lst_CLIENTS=${lst_CLIENTS:-$(comma_list "$(host_nids_address $CLIENTS $NETTYPE)")}
  34 interim_umount=false
  35 interim_umount1=false
  36
  37 #
  38 # _restore_mount(): This function calls restore_mount function for "MOUNT" and
  39 # "MOUNT2" paths to mount clients if they were not mounted and were umounted
  40 # in this file earlier.
  41 # Parameter: None
  42 # Returns: None. Exit with error if client mount fails.
  43 #
  44 _restore_mount () {
  45         if $interim_umount && ! is_mounted $MOUNT; then
  46                 restore_mount $MOUNT || error "Restore $MOUNT failed"
  47         fi
  48
  49         if $interim_umount1 && ! is_mounted $MOUNT2; then
  50                 restore_mount $MOUNT2 || error "Restore $MOUNT2 failed"
  51         fi
  52 }
  53
  54 is_mounted () {
  55     local mntpt=$1
  56     local mounted=$(mounted_lustre_filesystems)
  57     echo $mounted' ' | grep -w -q $mntpt' '
  58 }
  59
  60 if local_mode; then
  61    lst_SERVERS=`hostname`
  62    lst_CLIENTS=`hostname`
  63 fi
  64
  65 # FIXME: do we really need to unload lustre modules on all nodes?
  66 # bug 19387, comment 9
  67 # unloading lustre modules is not strictly necessary but unmounting
  68 # /mnt/lustre before running lst would be useful:
  69 # 1) because lustre messages clutter logs - we needn't them for testing LNET
  70 # 2) it's theoretically possible that lst tests congest comm paths so tightly
  71 # that mounted lustre wouldn't able to perform some of its background activities
  72 if is_mounted $MOUNT; then
  73         cleanup_mount $MOUNT || error "Fail to unmount client $MOUNT"
  74         interim_umount=true
  75 fi
  76
  77 if is_mounted $MOUNT2; then
  78         cleanup_mount $MOUNT2 || error "Fail to unmount client $MOUNT2"
  79         interim_umount1=true
  80 fi
  81
  82 build_test_filter
  83
  84 lst_prepare () {
  85     # Workaround for bug 15619
  86     lst_cleanup_all
  87     lst_setup_all
  88 }
  89
  90 # make batch
  91 test_smoke_sub () {
  92     local servers=$1
  93     local clients=$2
  94
  95
  96     local nc=$(echo ${clients//,/ } | wc -w)
  97     local ns=$(echo ${servers//,/ } | wc -w)
  98     echo '#!/bin/bash'
  99     echo 'set -e'
 100
 101     echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }'
 102
 103     echo "$LST new_session --timeo 100000 hh"
 104     echo "$LST add_group c $(nids_list $clients)"
 105     echo "$LST add_group s $(nids_list $servers)"
 106     echo "$LST add_batch b"
 107
 108     pre="$LST add_test --batch b --loop $lst_LOOP "
 109     for t in "brw read" "brw write" ; do
 110         for s in $lst_SIZES; do
 111             for c in $lst_CONCR; do
 112                 for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 113                     echo -n "$pre"
 114                     echo " --concurrency $c --distribute $d $t check=full size=$s"
 115                  done
 116             done
 117         done
 118     done
 119
 120     for c in $lst_CONCR; do
 121         for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do
 122             echo -n "$pre"
 123             echo " --concurrency $c --distribute $d ping "
 124         done
 125     done
 126
 127     echo $LST run b
 128     echo sleep 1
 129     echo "$LST stat --delay 10 --timeout 10 c s &"
 130     echo 'pid=$!'
 131     echo 'trap "cleanup $pid" INT TERM'
 132     echo sleep $smoke_DURATION
 133     echo 'cleanup $pid'
 134
 135 }
 136
 137 run_lst () {
 138    local file=$1
 139
 140    export LST_SESSION=$$
 141
 142    # start lst
 143    sh $file
 144 }
 145
 146 check_lst_err () {
 147         local log=$1
 148
 149         grep ^Total $log
 150
 151         if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then
 152                 _restore_mount
 153                 error 'lst Error found'
 154         fi
 155 }
 156
 157 test_smoke () {
 158         lst_prepare
 159
 160         local servers=$lst_SERVERS
 161         local clients=$lst_CLIENTS
 162
 163         local runlst=$TMP/smoke.sh
 164
 165         local log=$TMP/$tfile.log
 166         local rc=0
 167
 168         test_smoke_sub $servers $clients 2>&1 > $runlst
 169
 170         cat $runlst
 171
 172         run_lst $runlst | tee $log
 173         rc=${PIPESTATUS[0]}
 174         [ $rc = 0 ] || { _restore_mount; error "$runlst failed: $rc"; }
 175
 176         lst_end_session --verbose | tee -a $log
 177
 178         # error counters in "lst show_error" should be checked
 179         check_lst_err $log
 180         lst_cleanup_all
 181 }
 182 run_test smoke "lst regression test"
 183
 184 complete $SECONDS
 185 _restore_mount
 186 exit_status