2 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
4 # Run select tests by setting ONLY, or as arguments to the script.
5 # Skip specific tests by setting EXCEPT.
7 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
11 # bug number for skipped test:
12 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$SANITY_GSS_EXCEPT"}
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 # Tests that fail on uml
16 CPU=`awk '/model/ {print $4}' /proc/cpuinfo`
17 [ "$CPU" = "UML" ] && EXCEPT="$EXCEPT"
20 2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;;
21 *) error "unsupported kernel (gss only works with 2.6.x)" ;;
25 export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$SRCDIR/../utils/gss:$PATH:/sbin
26 export NAME=${NAME:-local}
29 CLEANUP=${CLEANUP:-""}
32 LUSTRE=${LUSTRE:-`dirname $0`/..}
33 . $LUSTRE/tests/test-framework.sh
35 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
38 require_dsh_mds || exit 0
40 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
42 # $RUNAS_ID may get set incorrectly somewhere else
43 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!"
45 # remove $SEC, we'd like to control everything by ourselves
49 # global variables of this sanity
52 KRB5_CRED=$KRB5_CCACHE_DIR/krb5cc_$RUNAS_ID
53 KRB5_CRED_SAVE=$KRB5_CCACHE_DIR/krb5cc.sanity.save
60 prepare_krb5_creds() {
61 echo prepare krb5 cred
64 $RUNAS krb5_login.sh || exit 1
65 [ -f $KRB5_CRED ] || exit 2
67 cp $KRB5_CRED $KRB5_CRED_SAVE
72 # we want double mount
73 MOUNT_2=${MOUNT_2:-"yes"}
74 check_and_setup_lustre
76 rm -rf $DIR/[df][0-9]*
78 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
84 NPROC=`cat /proc/cpuinfo 2>/dev/null | grep ^processor | wc -l`
85 [ $NPROC -gt 2 ] && NPROC=2
86 sh rundbench $NPROC 1>/dev/null &
90 num=`ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l`
91 if [ $num -ne 1 ]; then
92 error "failed to start dbench $NPROC"
94 echo "started dbench with $NPROC processes at background"
102 num=`ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l`
103 if [ $num -eq 0 ]; then
104 echo "dbench $DBENCH_PID already finished"
105 wait $DBENCH_PID || error "dbench $PID exit with error"
107 elif [ $num -ne 1 ]; then
109 error "found $num instance of pid $DBENCH_PID ???"
118 killall dbench 2>/dev/null
119 num=`ps --no-headers -p $DBENCH_PID | wc -l`
120 if [ $num -eq 0 ]; then
121 echo "dbench finished"
124 echo "dbench $DBENCH_PID is still running, waiting 2s..."
128 wait $DBENCH_PID || true
132 restore_krb5_cred() {
133 cp $KRB5_CRED_SAVE $KRB5_CRED
134 chown $RUNAS_ID:$RUNAS_ID $KRB5_CRED
135 chmod 0600 $KRB5_CRED
138 check_multiple_gss_daemons() {
141 local gssd_name=`basename $gssd`
143 for ((i=0;i<10;i++)); do
144 do_facet $facet "$gssd -v &"
147 # wait daemons entering "stable" status
150 num=`do_facet $facet ps -o cmd -C $gssd_name | grep $gssd_name | wc -l`
151 echo "$num instance(s) of $gssd_name are running"
153 if [ $num -ne 1 ]; then
154 error "$gssd_name not unique"
164 echo "bring up gss daemons..."
167 echo "check with someone already running..."
168 check_multiple_gss_daemons $my_facet $LSVCGSSD
170 check_multiple_gss_daemons $my_facet $LGSSD
173 echo "check with someone run & finished..."
174 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
175 sleep 5 # wait fully exit
176 check_multiple_gss_daemons $my_facet $LSVCGSSD
178 check_multiple_gss_daemons $my_facet $LGSSD
181 echo "check refresh..."
182 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
183 sleep 5 # wait fully exit
184 do_facet $my_facet ipcrm -S 0x3b92d473
185 check_multiple_gss_daemons $my_facet $LSVCGSSD
187 do_facet $my_facet ipcrm -S 0x3a92d473
188 check_multiple_gss_daemons $my_facet $LGSSD
191 run_test 0 "start multiple gss daemons"
196 local file=$DIR/$tfile
198 chmod 0777 $DIR || error "chmod $DIR failed"
202 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
203 $RUNAS touch $file && error "unexpected success"
207 $RUNAS touch $file || error "should not fail"
208 [ -f $file ] || error "$file not found"
210 run_test 1 "access with or without krb5 credential"
213 local file1=$DIR/$tfile-1
214 local file2=$DIR/$tfile-2
216 chmod 0777 $DIR || error "chmod $DIR failed"
217 # current access should be ok
218 $RUNAS touch $file1 || error "can't touch $file1"
219 [ -f $file1 ] || error "$file1 not found"
221 # cleanup all cred/ctx and touch
223 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
224 $RUNAS touch $file2 && error "unexpected success"
228 $RUNAS touch $file2 || error "should not fail"
229 [ -f $file2 ] || error "$file2 not found"
231 run_test 2 "lfs flushctx"
234 local file=$DIR/$tfile
237 echo "aaaaaaaaaaaaaaaaa" > $file
239 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
240 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
241 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
244 $RUNAS multiop $file o_r &
246 # wait multiop finish its open()
249 # cleanup all cred/ctx and check
250 # metadata check should fail, but file data check should success
251 # because we always use root credential to OSTs
253 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
254 echo "destroied credentials/contexs for $RUNAS_ID"
255 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
257 wait $OPPID || error "read file data failed"
258 echo "read file data OK"
260 # restore and check again
262 echo "restored credentials for $RUNAS_ID"
263 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
264 echo "$RUNAS_ID checkstat OK"
265 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
266 echo "$UID checkstat OK"
267 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
268 echo "$RUNAS_ID read file data OK"
270 run_test 3 "local cache under DLM lock"
273 local file1=$DIR/$tfile-1
274 local file2=$DIR/$tfile-2
276 ! $GSS_PIPEFS && skip "pipefs not used" && return
278 chmod 0777 $DIR || error "chmod $DIR failed"
279 # current access should be ok
280 $RUNAS touch $file1 || error "can't touch $file1"
281 [ -f $file1 ] || error "$file1 not found"
284 send_sigint client lgssd
286 check_gss_daemon_facet client lgssd && error "lgssd still running"
288 # flush context, and touch
289 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
290 $RUNAS touch $file2 &
292 echo "waiting touch pid $TOUCHPID"
293 wait $TOUCHPID && error "touch should fail"
296 do_facet client "$LGSSD -v"
298 check_gss_daemon_facet client lgssd
300 # touch new should succeed
301 $RUNAS touch $file2 || error "can't touch $file2"
302 [ -f $file2 ] || error "$file2 not found"
304 run_test 4 "lgssd dead, operations should wait timeout and fail"
307 local file1=$DIR/$tfile-1
308 local file2=$DIR/$tfile-2
309 local wait_time=$((TIMEOUT + TIMEOUT / 2))
311 chmod 0777 $DIR || error "chmod $DIR failed"
312 # current access should be ok
313 $RUNAS touch $file1 || error "can't touch $file1"
314 [ -f $file1 ] || error "$file1 not found"
317 send_sigint $(comma_list $(mdts_nodes)) lsvcgssd
319 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd && error "lsvcgssd still running"
321 # flush context, and touch
322 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
323 $RUNAS touch $file2 &
327 echo "waiting $wait_time seconds for touch pid $TOUCHPID"
329 num=`ps --no-headers -p $TOUCHPID | wc -l`
330 [ $num -eq 1 ] || error "touch already ended ($num)"
331 echo "process $TOUCHPID still hanging there... OK"
333 # restart lsvcgssd, expect touch suceed
334 echo "restart lsvcgssd and recovering"
335 start_gss_daemons $(comma_list $(mdts_nodes)) "$LSVCGSSD -v"
337 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd
338 wait $TOUCHPID || error "touch fail"
339 [ -f $file2 ] || error "$file2 not found"
341 run_test 5 "lsvcgssd dead, operations lead to recovery"
346 mkdir $DIR/d6 || error "mkdir $DIR/d6 failed"
347 for ((i=0; i<$nfile; i++)); do
348 dd if=/dev/zero of=$DIR/d6/file$i bs=8k count=1 || error "dd file$i failed"
350 ls -l $DIR/d6/* > /dev/null || error "ls failed"
351 rm -rf $DIR2/d6/* || error "rm failed"
352 rmdir $DIR2/d6/ || error "rmdir failed"
354 run_test 6 "test basic DLM callback works"
361 # for open(), client only reserve space for default stripe count lovea,
362 # and server may return larger lovea in reply (because of larger stripe
363 # count), client need call enlarge_reqbuf() and save the replied lovea
364 # in request for future possible replay.
366 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
367 # the path, however it does work in local test which has 2 OSTs and
368 # default stripe count is 1.
370 num_osts=`$LFS getstripe $MOUNT | egrep "^[0-9]*:.*ACTIVE" | wc -l`
371 echo "found $num_osts active OSTs"
372 [ $num_osts -lt 2 ] && echo "skipping $TESTNAME (must have >= 2 OSTs)" && return
375 $LFS setstripe -c $num_osts $tdir || error
378 for ((i=0;i<20;i++)); do
379 dd if=/dev/zero of=$tdir/f$i bs=4k count=16 2>/dev/null
382 for ((i=0;i<20;i++)); do
383 dd if=$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
387 run_test 7 "exercise enlarge_reqbuf()"
391 local ATHISTORY=$(do_facet $SINGLEMDS "find /sys/ -name at_history")
392 local ATOLDBASE=$(do_facet $SINGLEMDS "cat $ATHISTORY")
394 do_facet $SINGLEMDS "echo 8 >> $ATHISTORY"
401 sysctl -w lnet.debug="+other"
403 # wait for the at estimation come down, this is faster
405 REQ_DELAY=`lctl get_param -n mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
406 awk '/portal 12/ {print $5}' | tail -1`
407 [ $REQ_DELAY -le 5 ] && break
408 echo "current AT estimation is $REQ_DELAY, wait a little bit"
411 REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
413 # sleep sometime in ctx handle
414 do_facet $SINGLEMDS lctl set_param fail_val=$REQ_DELAY
415 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
416 do_facet $SINGLEMDS lctl set_param fail_loc=0x1204
418 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
420 $RUNAS touch $DIR/d8/f &
422 echo "waiting for touch (pid $TOUCHPID) to finish..."
423 sleep 2 # give it a chance to really trigger context init rpc
424 do_facet $SINGLEMDS sysctl -w lustre.fail_loc=0
425 wait $TOUCHPID || error "touch should have succeeded"
427 $LCTL dk | grep "Early reply #" || error "No early reply"
430 do_facet $SINGLEMDS "echo $ATOLDBASE >> $ATHISTORY" || true
432 run_test 8 "Early reply sent for slow gss context negotiation"
435 # following tests will manipulate flavors and may end with any flavor set,
436 # so each test should not assume any start flavor.
440 if [ "$SLOW" = "no" ]; then
446 restore_to_default_flavor
447 set_rule $FSNAME any any krb5p
448 wait_flavor all2all krb5p
452 for ((n=0;n<$total;n++)); do
455 echo "flush ctx ($n/$total) ..."
456 $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
459 #sleep to let ctxs be re-established
463 run_test 90 "recoverable from losing contexts under load"
473 nrule_old=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
474 | grep "$FSNAME.srpc.flavor." | wc -l`
475 echo "original general rules: $nrule_old"
477 for ((i = $nrule_old; i < $max; i++)); do
478 set_rule $FSNAME elan$i any krb5n || error "set rule $i"
480 for ((i = $nrule_old; i < $max; i++)); do
481 set_rule $FSNAME elan$i any || error "remove rule $i"
484 nrule_new=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
485 | grep "$FSNAME.srpc.flavor." | wc -l`
486 if [ $nrule_new != $nrule_old ]; then
487 error "general rule: $nrule_new != $nrule_old"
491 # target-specific rules
493 nrule_old=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
494 | grep "$FSNAME-MDT0000.srpc.flavor." | wc -l`
495 echo "original target rules: $nrule_old"
497 for ((i = $nrule_old; i < $max; i++)); do
498 set_rule $FSNAME-MDT0000 elan$i any krb5i || error "set rule $i"
500 for ((i = $nrule_old; i < $max; i++)); do
501 set_rule $FSNAME-MDT0000 elan$i any || error "remove rule $i"
504 nrule_new=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
505 | grep "$FSNAME-MDT0000.srpc.flavor." | wc -l`
506 if [ $nrule_new != $nrule_old ]; then
507 error "general rule: $nrule_new != $nrule_old"
510 run_test 99 "set large number of sptlrpc rules"
523 # started from default flavors
524 restore_to_default_flavor
526 # running dbench background
530 # all: null -> krb5n -> krb5a -> krb5i -> krb5p -> plain
532 set_rule $FSNAME any any krb5n
533 wait_flavor all2all krb5n || error_dbench "1"
536 set_rule $FSNAME any any krb5a
537 wait_flavor all2all krb5a || error_dbench "2"
540 set_rule $FSNAME any any krb5i
541 wait_flavor all2all krb5i || error_dbench "3"
544 set_rule $FSNAME any any krb5p
545 wait_flavor all2all krb5p || error_dbench "4"
548 set_rule $FSNAME any any plain
549 wait_flavor all2all plain || error_dbench "5"
558 set_rule $FSNAME any mdt2mdt krb5a
559 wait_flavor mdt2mdt krb5a || error_dbench "6"
562 set_rule $FSNAME any cli2mdt krb5i
563 wait_flavor cli2mdt krb5i || error_dbench "7"
566 set_rule $FSNAME any mdt2ost krb5p
567 wait_flavor mdt2ost krb5p || error_dbench "8"
570 set_rule $FSNAME any cli2ost krb5n
571 wait_flavor cli2ost krb5n || error_dbench "9"
578 # nothing should be changed because they are override by above dir rules
580 set_rule $FSNAME-MDT0000 any any krb5p
581 set_rule $FSNAME-OST0000 any any krb5i
582 wait_flavor mdt2mdt krb5a || error_dbench "10"
583 wait_flavor cli2mdt krb5i || error_dbench "11"
585 wait_flavor mdt2ost krb5p || error_dbench "12"
586 wait_flavor cli2ost krb5n || error_dbench "13"
589 # delete all dir-specific rules
591 set_rule $FSNAME any mdt2mdt
592 set_rule $FSNAME any cli2mdt
593 set_rule $FSNAME any mdt2ost
594 set_rule $FSNAME any cli2ost
595 wait_flavor mdt2mdt krb5p $((MDSCOUNT - 1)) || error_dbench "14"
596 wait_flavor cli2mdt krb5p $(get_clients_mount_count) || error_dbench "15"
598 wait_flavor mdt2ost krb5i $MDSCOUNT || error_dbench "16"
599 wait_flavor cli2ost krb5i $(get_clients_mount_count) || error_dbench "17"
607 set_rule $FSNAME-MDT0000 any any
608 set_rule $FSNAME-OST0000 any any || error_dbench "18"
609 wait_flavor all2all plain || error_dbench "19"
614 run_test 100 "change security flavor on the fly under load"
620 local filename=$DIR/$tfile
625 # after set to flavor0, start multop which use flavor0 rpc, and let
626 # server drop the reply; then switch to flavor1, the resend should be
627 # completed using flavor1. To exercise the code of switching ctx/sec
628 # for a resend request.
630 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
632 set_rule $FSNAME any cli2mdt $flavor0
633 wait_flavor cli2mdt $flavor0
634 rm -f $filename || error "remove old $filename failed"
637 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
638 do_facet $SINGLEMDS lctl set_param fail_val=36
639 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
640 log "starting multiop"
641 multiop $filename m &
643 echo "multiop pid=$multiop_pid"
646 set_rule $FSNAME any cli2mdt $flavor1
647 wait_flavor cli2mdt $flavor1
649 num=`ps --no-headers -p $multiop_pid 2>/dev/null | wc -l`
650 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
651 echo "process $multiop_pid is still hanging there... OK"
653 do_facet $SINGLEMDS lctl set_param fail_loc=0
654 log "waiting for multiop ($multiop_pid) to finish"
655 wait $multiop_pid || error "multiop returned error"
660 # started from default flavors
661 restore_to_default_flavor
663 switch_sec_test null plain
664 switch_sec_test plain krb5n
665 switch_sec_test krb5n krb5a
666 switch_sec_test krb5a krb5i
667 switch_sec_test krb5i krb5p
668 switch_sec_test krb5p null
669 switch_sec_test null krb5p
670 switch_sec_test krb5p krb5i
671 switch_sec_test krb5i plain
672 switch_sec_test plain krb5p
674 run_test 101 "switch ctx/sec for resending request"
687 # started from default flavors
688 restore_to_default_flavor
690 # run dbench background
693 echo "Testing null->krb5n->krb5a->krb5i->krb5p->plain->null"
694 set_rule $FSNAME any any krb5n
695 set_rule $FSNAME any any krb5a
696 set_rule $FSNAME any any krb5i
697 set_rule $FSNAME any any krb5p
698 set_rule $FSNAME any any plain
699 set_rule $FSNAME any any null
702 wait_flavor all2all null || error_dbench "1"
705 echo "waiting for 15s and check again"
709 echo "Testing null->krb5i->null->krb5i->null..."
710 for ((i=0; i<10; i++)); do
711 set_rule $FSNAME any any krb5i
712 set_rule $FSNAME any any null
714 set_rule $FSNAME any any krb5i
717 wait_flavor all2all krb5i || error_dbench "2"
720 echo "waiting for 15s and check again"
726 run_test 102 "survive from insanely fast flavor switch"
731 local clients=$CLIENTS
733 [ -z $clients ] && clients=$HOSTNAME
735 # started from default flavors
736 restore_to_default_flavor
738 # at this time no rules has been set on mgs; mgc use null
739 # flavor connect to mgs.
740 count=`flvr_cnt_mgc2mgs null`
741 [ $count -eq 1 ] || error "$count mgc connection use null flavor"
743 zconf_umount_clients $clients $MOUNT || return 1
745 # mount client with conflict flavor - should fail
747 MOUNTOPT="$MOUNTOPT,mgssec=krb5p"
748 zconf_mount_clients $clients $MOUNT && \
749 error "mount with conflict flavor should have failed"
752 # mount client with same flavor - should succeed
754 MOUNTOPT="$MOUNTOPT,mgssec=null"
755 zconf_mount_clients $clients $MOUNT || \
756 error "mount with same flavor should have succeeded"
758 zconf_umount_clients $clients $MOUNT || return 2
760 # mount client with default flavor - should succeed
761 zconf_mount_clients $clients $MOUNT || \
762 error "mount with default flavor should have succeeded"
764 run_test 150 "secure mgs connection: client flavor setting"
769 # set mgs only accept krb5p
770 set_rule _mgs any any krb5p
772 # umount everything, modules still loaded
775 # mount mgs with default flavor, in current framework it means mgs+mdt1.
776 # the connection of mgc of mdt1 to mgs is expected fail.
777 DEVNAME=$(mdsdevname 1)
778 start mds1 $DEVNAME $MDS_MOUNT_OPTS && error "mount with default flavor should have failed"
780 # mount with unauthorized flavor should fail
781 save_opts=$MDS_MOUNT_OPTS
782 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=null"
783 start mds1 $DEVNAME $MDS_MOUNT_OPTS && error "mount with unauthorized flavor should have failed"
784 MDS_MOUNT_OPTS=$save_opts
786 # mount with designated flavor should succeed
787 save_opts=$MDS_MOUNT_OPTS
788 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=krb5p"
789 start mds1 $DEVNAME $MDS_MOUNT_OPTS || error "mount with designated flavor should have succeeded"
790 MDS_MOUNT_OPTS=$save_opts
794 run_test 151 "secure mgs connection: server flavor control"
796 equals_msg `basename $0`: test complete, cleaning up
797 check_and_cleanup_lustre
798 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true