2 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
9 export GMNALNID=${GMNALNID:-/usr/sbin/gmlndnid}
11 # eg, assert_env LUSTRE MDSNODES OSTNODES CLIENTS
15 if [ -z "${!name}" ]; then
16 echo "$0: $name must be set"
20 [ $failed ] && exit 1 || true
24 echo "usage: $0 [-r] [-f cfgfile]"
31 export LUSTRE=`absolute_path $LUSTRE`
32 export TESTSUITE=`basename $0 .sh`
33 export XMLCONFIG=${XMLCONFIG:-${TESTSUITE}.xml}
34 export LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest}
36 [ -d /r ] && export ROOT=${ROOT:-/r}
37 export TMP=${TMP:-$ROOT/tmp}
39 export PATH=:$PATH:$LUSTRE/utils:$LUSTRE/tests
40 export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
41 export MKFS=${MKFS:-"$LUSTRE/utils/mkfs.lustre"}
42 export CHECKSTAT="${CHECKSTAT:-checkstat} "
43 export FSYTPE=${FSTYPE:-"ext3"}
45 if [ "$ACCEPTOR_PORT" ]; then
46 export PORT_OPT="--port $ACCEPTOR_PORT"
49 # Paths on remote nodes, if different
50 export RLUSTRE=${RLUSTRE:-$LUSTRE}
51 export RPWD=${RPWD:-$PWD}
55 while getopts "rvf:" opt $*; do
58 r) REFORMAT=--reformat;;
67 # save the name of the config file for the upcall
68 echo "XMLCONFIG=$LUSTRE/tests/$XMLCONFIG" > $LUSTRE/tests/XMLCONFIG
69 # echo "CONFIG=`canonical_path $CONFIG`" > $LUSTRE/tests/CONFIG
73 # start facet device options
79 echo "Starting ${device} as /mnt/${facet} (opts:$@)"
80 mkdir -p /mnt/${facet}
81 do_facet ${facet} mount -t lustre $@ ${device} /mnt/${facet}
82 #do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
83 # --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
86 if [ $RC -ne 0 ]; then
87 echo mount -t lustre $@ ${device} /mnt/${facet}
88 echo Start of ${device} on ${facet} failed ${RC}
90 label=`do_facet ${facet} e2label ${device}`
91 eval export ${facet}_svc=${label}
92 eval export ${facet}_dev=${device}
93 eval export ${facet}_opt=\"$@\"
102 local running=`do_facet ${facet} mount | grep -c /mnt/${facet}" "`
103 if [ $running -ne 0 ]; then
104 echo "Stopping /mnt/${facet} (opts:$@)"
105 do_facet ${facet} umount -d $@ /mnt/${facet}
107 #do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
108 # --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
109 # $@ --cleanup $XMLCONFIG
118 do_node $client mkdir $mnt 2> /dev/null || :
120 # Only supply -o to mount if we have options
121 if [ -n "$MOUNTOPT" ]; then
122 OPTIONS="-o $MOUNTOPT"
125 do_node $client mount -t lustre $OPTIONS \
126 `facet_nid mgs`:/lustre $mnt || return 1
128 do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }"
130 [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
138 do_node $client umount $force $mnt
143 if [ "$FAILURE_MODE" = HARD ]; then
144 $POWER_DOWN `facet_active_host $facet`
146 elif [ "$FAILURE_MODE" = SOFT ]; then
153 if [ "$FAILURE_MODE" = HARD ]; then
154 $POWER_UP `facet_active_host $facet`
162 check_network "$HOST" 900
163 while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done
168 HOST=`facet_active_host $facet`
173 # not every config has many clients
174 if [ ! -z "$CLIENTS" ]; then
175 $PDSH $CLIENTS "df $MOUNT" > /dev/null
180 uname -n >> $MOUNT/recon
181 if [ ! -z "$CLIENTS" ]; then
182 $PDSH $CLIENTS "df $MOUNT; uname -n >> $MOUNT/recon" > /dev/null
184 echo Connected clients:
186 ls -l $MOUNT/recon > /dev/null
192 echo "Failing $facet on node `facet_active_host $facet`"
193 shutdown_facet $facet
197 echo "df pid is $DFPID"
199 TO=`facet_active_host $facet`
200 echo "Failover $facet to $TO"
202 local dev=${facet}_dev
203 local opt=${facet}_opt
204 start $facet ${!dev} ${!opt}
215 local svc=${facet}_svc
216 do_facet $facet $LCTL --device %${!svc} readonly
217 do_facet $facet $LCTL --device %${!svc} notransno
218 do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
219 $LCTL mark "local REPLAY BARRIER on ${!svc}"
222 replay_barrier_nodf() {
225 local svc=${facet}_svc
226 echo Replay barrier on ${!svc}
227 do_facet $facet $LCTL --device %${!svc} readonly
228 do_facet $facet $LCTL --device %${!svc} notransno
229 do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
230 $LCTL mark "local REPLAY BARRIER on ${!svc}"
234 UUID=`cat /proc/fs/lustre/mdc/*_MNT_*/uuid`
235 do_facet mds "echo $UUID > /proc/fs/lustre/mds/${mds_svc}/evict_client"
240 df $MOUNT || error "post-failover df: $?"
247 local svc=${facet}_svc
248 local dev=${facet}_dev
249 local opt=${facet}_opt
250 start $facet ${!dev} ${!opt}
251 do_facet $facet lctl --device %${!svc} abort_recovery
252 df $MOUNT || echo "first df failed: $?"
254 df $MOUNT || error "post-failover df: $?"
258 echo There is no lmc. This is mountconf, baby.
263 if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else
264 ID=`$PDSH $1 $GMNALNID -l | cut -d\ -f2`
270 if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else
277 if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else
278 ID=`echo $1 | sed 's/[^0-9]*//g'`
285 if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else
286 ID=`echo $1 | sed 's/[^0-9]*//g'`
294 varname=${facet}_HOST
300 HOST=`facet_host $facet`
301 if [ -z "$HOST" ]; then
302 echo "The env variable ${facet}_HOST must be set."
305 if [ -z "$NETTYPE" ]; then
306 echo "The env variable NETTYPE must be set."
309 echo `h2$NETTYPE $HOST`
314 local activevar=${facet}active
316 if [ -f ./${facet}active ] ; then
317 source ./${facet}active
321 if [ -z "$active" ] ; then
328 facet_active_host() {
330 local active=`facet_active $facet`
331 if [ "$facet" == client ]; then
334 echo `facet_host $active`
340 failover=${facet}failover
341 host=`facet_host $failover`
342 [ -z "$host" ] && return
343 curactive=`facet_active $facet`
344 if [ -z "${curactive}" -o "$curactive" == "$failover" ] ; then
345 eval export ${facet}active=$facet
347 eval export ${facet}active=$failover
349 # save the active host for this facet
350 activevar=${facet}active
351 echo "$activevar=${!activevar}" > ./$activevar
358 if [ "$HOST" = "$(hostname)" ]; then
363 $myPDSH $HOST $LCTL mark "$@" > /dev/null 2>&1 || :
365 $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")"
371 HOST=`facet_active_host $facet`
378 echo "add facet $facet: `facet_host $facet`"
379 do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT \
380 --lustre_upcall $UPCALL --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM
381 do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` \
382 --nettype lnet $PORT_OPT
399 [ "x$CLIENTOPT" != "x" ] && MOUNT_OPTS="--clientoptions $CLIENTOPT"
400 add_facet $facet --lustre_upcall $UPCALL
401 do_lmc --add mtpt --node ${facet}_facet --mds ${mds}_svc $* $MOUNT_OPTS
412 while [ $NETWORK -eq 0 ]; do
413 ping -c 1 -w 3 $1 > /dev/null
414 if [ $? -eq 0 ]; then
418 echo "waiting for $1, $((MAX - WAIT)) secs left"
421 if [ $WAIT -gt $MAX ]; then
422 echo "Network not available"
428 while( !($DSH2 $1 "netstat -tna | grep -q $2") ) ; do
439 # the sed converts spaces to commas, but leaves the last space
440 # alone, so the line doesn't end with a comma.
441 echo "$*" | tr -s " " "\n" | sort -b -u | tr "\n" " " | sed 's/ \([^$]\)/,\1/g'
445 (cd `dirname $1`; echo $PWD/`basename $1`)
448 ##################################
452 # OBD_FAIL_MDS_ALL_REQUEST_NET
454 do_facet mds sysctl -w lustre.fail_loc=0x123
455 do_facet client "$1" || RC=$?
456 do_facet mds sysctl -w lustre.fail_loc=0
461 # OBD_FAIL_MDS_ALL_REPLY_NET
463 do_facet mds sysctl -w lustre.fail_loc=0x122
464 do_facet client "$@" || RC=$?
465 do_facet mds sysctl -w lustre.fail_loc=0
470 # OBD_FAIL_MDS_REINT_NET_REP
472 do_facet mds sysctl -w lustre.fail_loc=0x119
473 do_facet client "$@" || RC=$?
474 do_facet mds sysctl -w lustre.fail_loc=0
479 #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214
481 do_facet ost sysctl -w lustre.fail_loc=0x214
482 do_facet client "$1" || RC=$?
483 do_facet client "sync"
484 do_facet ost sysctl -w lustre.fail_loc=0
489 #define OBD_FAIL_LDLM_CANCEL 0x304
491 do_facet client sysctl -w lustre.fail_loc=0x304
492 do_facet client "$@" || RC=$?
493 do_facet client sysctl -w lustre.fail_loc=0
498 #define OBD_FAIL_LDLM_BL_CALLBACK 0x305
500 do_facet client sysctl -w lustre.fail_loc=0x305
501 do_facet client "$@" || RC=$?
502 do_facet client sysctl -w lustre.fail_loc=0
510 echo "clearing fail_loc on $facet"
511 do_facet $facet "sysctl -w lustre.fail_loc=0"
515 $LCTL mark "cancel_lru_locks start"
516 for d in /proc/fs/lustre/ldlm/namespaces/$1*; do
517 if [ -f $d/lru_size ]; then
518 echo clear > $d/lru_size
519 grep "[0-9]" $d/lock_unused_count
522 $LCTL mark "cancel_lru_locks stop"
527 for a in /proc/fs/lustre/llite/*/dump_page_cache; do
528 if [ `wc -l $a | awk '{print $1}'` -gt 1 ]; then
529 echo there is still data in page cache $a ?
537 ##################################
540 sysctl -w lustre.fail_loc=0 || true
541 echo "${TESTSUITE}: **** FAIL:" $@
546 build_test_filter() {
547 [ "$ONLY" ] && log "only running test `echo $ONLY`"
551 [ "$EXCEPT$ALWAYS_EXCEPT" ] && \
552 log "skipping tests: `echo $EXCEPT $ALWAYS_EXCEPT`"
553 for E in $EXCEPT $ALWAYS_EXCEPT; do
554 eval EXCEPT_${E}=true
563 IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
567 export base=`basetest $1`
568 if [ ! -z "$ONLY" ]; then
570 if [ ${!testname}x != x ]; then
575 if [ ${!testname}x != x ]; then
583 if [ ${!testname}x != x ]; then
584 echo "skipping excluded test $1"
587 testname=EXCEPT_$base
588 if [ ${!testname}x != x ]; then
589 echo "skipping excluded test $1 (base $base)"
597 EQUALS="======================================================================"
601 local suffixlen=$((${#EQUALS} - ${#msg}))
602 [ $suffixlen -lt 5 ] && suffixlen=5
603 printf '===== %s %.*s\n' "$msg" $suffixlen $EQUALS
608 $LCTL mark "$*" 2> /dev/null || true
616 FFREE=`cat /proc/fs/lustre/mds/*/filesfree`
617 FTOTAL=`cat /proc/fs/lustre/mds/*/filestotal`
618 [ $FFREE -ge $FTOTAL ] && error "files free $FFREE > total $FTOTAL" || true
627 # Pretty tests run faster.
628 equals_msg $testnum: $message
631 log "== test $testnum: $message ============ `date +%H:%M:%S` ($BEFORE)"
633 test_${testnum} || error "test_$testnum failed with $?"
635 pass "($((`date +%s` - $BEFORE))s)"
639 (cd `dirname $1`; echo $PWD/`basename $1`)