2 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
4 # Run select tests by setting ONLY, or as arguments to the script.
5 # Skip specific tests by setting EXCEPT.
7 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
11 # bug number for skipped test:
12 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$SANITY_GSS_EXCEPT"}
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 # Tests that fail on uml
16 CPU=`awk '/model/ {print $4}' /proc/cpuinfo`
17 [ "$CPU" = "UML" ] && EXCEPT="$EXCEPT"
20 2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;;
21 *) error "unsupported kernel (gss only works with 2.6.x)" ;;
25 export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$SRCDIR/../utils/gss:$PATH:/sbin
26 export NAME=${NAME:-local}
29 CLEANUP=${CLEANUP:-""}
32 LUSTRE=${LUSTRE:-`dirname $0`/..}
33 . $LUSTRE/tests/test-framework.sh
35 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
37 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
39 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
41 # $RUNAS_ID may get set incorrectly somewhere else
42 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!"
44 # remove $SEC, we'd like to control everything by ourselves
48 # global variables of this sanity
51 KRB5_CRED=$KRB5_CCACHE_DIR/krb5cc_$RUNAS_ID
52 KRB5_CRED_SAVE=$KRB5_CCACHE_DIR/krb5cc.sanity.save
59 prepare_krb5_creds() {
60 echo prepare krb5 cred
63 $RUNAS krb5_login.sh || exit 1
64 [ -f $KRB5_CRED ] || exit 2
66 cp $KRB5_CRED $KRB5_CRED_SAVE
71 # we want double mount
72 MOUNT_2=${MOUNT_2:-"yes"}
73 check_and_setup_lustre
75 rm -rf $DIR/[df][0-9]*
77 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
83 NPROC=`cat /proc/cpuinfo 2>/dev/null | grep ^processor | wc -l`
84 [ $NPROC -gt 2 ] && NPROC=2
85 sh rundbench $NPROC 1>/dev/null &
89 num=`ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l`
90 if [ $num -ne 1 ]; then
91 error "failed to start dbench $NPROC"
93 echo "started dbench with $NPROC processes at background"
101 num=`ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l`
102 if [ $num -eq 0 ]; then
103 echo "dbench $DBENCH_PID already finished"
104 wait $DBENCH_PID || error "dbench $PID exit with error"
106 elif [ $num -ne 1 ]; then
108 error "found $num instance of pid $DBENCH_PID ???"
117 killall dbench 2>/dev/null
118 num=`ps --no-headers -p $DBENCH_PID | wc -l`
119 if [ $num -eq 0 ]; then
120 echo "dbench finished"
123 echo "dbench $DBENCH_PID is still running, waiting 2s..."
127 wait $DBENCH_PID || true
131 restore_krb5_cred() {
132 cp $KRB5_CRED_SAVE $KRB5_CRED
133 chown $RUNAS_ID:$RUNAS_ID $KRB5_CRED
134 chmod 0600 $KRB5_CRED
137 check_multiple_gss_daemons() {
140 local gssd_name=`basename $gssd`
142 for ((i=0;i<10;i++)); do
143 do_facet $facet "$gssd -v &"
146 # wait daemons entering "stable" status
149 num=`do_facet $facet ps -o cmd -C $gssd_name | grep $gssd_name | wc -l`
150 echo "$num instance(s) of $gssd_name are running"
152 if [ $num -ne 1 ]; then
153 error "$gssd_name not unique"
163 echo "bring up gss daemons..."
166 echo "check with someone already running..."
167 check_multiple_gss_daemons $my_facet $LSVCGSSD
169 check_multiple_gss_daemons $my_facet $LGSSD
172 echo "check with someone run & finished..."
173 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
174 sleep 5 # wait fully exit
175 check_multiple_gss_daemons $my_facet $LSVCGSSD
177 check_multiple_gss_daemons $my_facet $LGSSD
180 echo "check refresh..."
181 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
182 sleep 5 # wait fully exit
183 do_facet $my_facet ipcrm -S 0x3b92d473
184 check_multiple_gss_daemons $my_facet $LSVCGSSD
186 do_facet $my_facet ipcrm -S 0x3a92d473
187 check_multiple_gss_daemons $my_facet $LGSSD
190 run_test 0 "start multiple gss daemons"
195 local file=$DIR/$tfile
197 chmod 0777 $DIR || error "chmod $DIR failed"
201 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
202 $RUNAS touch $file && error "unexpected success"
206 $RUNAS touch $file || error "should not fail"
207 [ -f $file ] || error "$file not found"
209 run_test 1 "access with or without krb5 credential"
212 local file1=$DIR/$tfile-1
213 local file2=$DIR/$tfile-2
215 chmod 0777 $DIR || error "chmod $DIR failed"
216 # current access should be ok
217 $RUNAS touch $file1 || error "can't touch $file1"
218 [ -f $file1 ] || error "$file1 not found"
220 # cleanup all cred/ctx and touch
222 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
223 $RUNAS touch $file2 && error "unexpected success"
227 $RUNAS touch $file2 || error "should not fail"
228 [ -f $file2 ] || error "$file2 not found"
230 run_test 2 "lfs flushctx"
233 local file=$DIR/$tfile
236 echo "aaaaaaaaaaaaaaaaa" > $file
238 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
239 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
240 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
243 $RUNAS multiop $file o_r &
245 # wait multiop finish its open()
248 # cleanup all cred/ctx and check
249 # metadata check should fail, but file data check should success
250 # because we always use root credential to OSTs
252 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
253 echo "destroied credentials/contexs for $RUNAS_ID"
254 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
256 wait $OPPID || error "read file data failed"
257 echo "read file data OK"
259 # restore and check again
261 echo "restored credentials for $RUNAS_ID"
262 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
263 echo "$RUNAS_ID checkstat OK"
264 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
265 echo "$UID checkstat OK"
266 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
267 echo "$RUNAS_ID read file data OK"
269 run_test 3 "local cache under DLM lock"
272 local file1=$DIR/$tfile-1
273 local file2=$DIR/$tfile-2
275 ! $GSS_PIPEFS && skip "pipefs not used" && return
277 chmod 0777 $DIR || error "chmod $DIR failed"
278 # current access should be ok
279 $RUNAS touch $file1 || error "can't touch $file1"
280 [ -f $file1 ] || error "$file1 not found"
283 send_sigint client lgssd
285 check_gss_daemon_facet client lgssd && error "lgssd still running"
287 # flush context, and touch
288 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
289 $RUNAS touch $file2 &
291 echo "waiting touch pid $TOUCHPID"
292 wait $TOUCHPID && error "touch should fail"
295 do_facet client "$LGSSD -v"
297 check_gss_daemon_facet client lgssd
299 # touch new should succeed
300 $RUNAS touch $file2 || error "can't touch $file2"
301 [ -f $file2 ] || error "$file2 not found"
303 run_test 4 "lgssd dead, operations should wait timeout and fail"
306 local file1=$DIR/$tfile-1
307 local file2=$DIR/$tfile-2
308 local wait_time=$((TIMEOUT + TIMEOUT / 2))
310 chmod 0777 $DIR || error "chmod $DIR failed"
311 # current access should be ok
312 $RUNAS touch $file1 || error "can't touch $file1"
313 [ -f $file1 ] || error "$file1 not found"
316 send_sigint $(comma_list $(mdts_nodes)) lsvcgssd
318 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd && error "lsvcgssd still running"
320 # flush context, and touch
321 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
322 $RUNAS touch $file2 &
326 echo "waiting $wait_time seconds for touch pid $TOUCHPID"
328 num=`ps --no-headers -p $TOUCHPID | wc -l`
329 [ $num -eq 1 ] || error "touch already ended ($num)"
330 echo "process $TOUCHPID still hanging there... OK"
332 # restart lsvcgssd, expect touch suceed
333 echo "restart lsvcgssd and recovering"
334 start_gss_daemons $(comma_list $(mdts_nodes)) "$LSVCGSSD -v"
336 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd
337 wait $TOUCHPID || error "touch fail"
338 [ -f $file2 ] || error "$file2 not found"
340 run_test 5 "lsvcgssd dead, operations lead to recovery"
345 mkdir $DIR/d6 || error "mkdir $DIR/d6 failed"
346 for ((i=0; i<$nfile; i++)); do
347 dd if=/dev/zero of=$DIR/d6/file$i bs=8k count=1 || error "dd file$i failed"
349 ls -l $DIR/d6/* > /dev/null || error "ls failed"
350 rm -rf $DIR2/d6/* || error "rm failed"
351 rmdir $DIR2/d6/ || error "rmdir failed"
353 run_test 6 "test basic DLM callback works"
360 # for open(), client only reserve space for default stripe count lovea,
361 # and server may return larger lovea in reply (because of larger stripe
362 # count), client need call enlarge_reqbuf() and save the replied lovea
363 # in request for future possible replay.
365 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
366 # the path, however it does work in local test which has 2 OSTs and
367 # default stripe count is 1.
369 num_osts=`$LFS getstripe $MOUNT | egrep "^[0-9]*:.*ACTIVE" | wc -l`
370 echo "found $num_osts active OSTs"
371 [ $num_osts -lt 2 ] && echo "skipping $TESTNAME (must have >= 2 OSTs)" && return
374 $LFS setstripe -c $num_osts $tdir || error
377 for ((i=0;i<20;i++)); do
378 dd if=/dev/zero of=$tdir/f$i bs=4k count=16 2>/dev/null
381 for ((i=0;i<20;i++)); do
382 dd if=$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
386 run_test 7 "exercise enlarge_reqbuf()"
390 local ATHISTORY=$(do_facet mds "find /sys/ -name at_history")
391 local ATOLDBASE=$(do_facet mds "cat $ATHISTORY")
392 do_facet mds "echo 8 >> $ATHISTORY"
396 sysctl -w lnet.debug="+other"
401 REQ_DELAY=`lctl get_param -n mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
402 awk '/portal 12/ {print $5}' | tail -1`
403 REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
405 # sleep sometime in ctx handle
406 do_facet mds lctl set_param fail_val=$REQ_DELAY
407 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
408 do_facet mds lctl set_param fail_loc=0x1204
410 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
412 $RUNAS touch $DIR/d8/f &
414 echo "waiting for touch (pid $TOUCHPID) to finish..."
415 sleep 2 # give it a chance to really trigger context init rpc
416 do_facet mds sysctl -w lustre.fail_loc=0
417 wait $TOUCHPID || error "touch should have succeeded"
419 $LCTL dk | grep "Early reply #" || error "No early reply"
422 do_facet mds "echo $ATOLDBASE >> $ATHISTORY" || true
424 run_test 8 "Early reply sent for slow gss context negotiation"
427 # following tests will manipulate flavors and may end with any flavor set,
428 # so each test should not assume any start flavor.
432 if [ "$SLOW" = "no" ]; then
438 restore_to_default_flavor
439 set_rule $FSNAME any any krb5p
440 wait_flavor all2all krb5p
444 for ((n=0;n<$total;n++)); do
447 echo "flush ctx ($n/$total) ..."
448 $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
451 #sleep to let ctxs be re-established
455 run_test 90 "recoverable from losing contexts under load"
465 nrule_old=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
466 | grep "$FSNAME.srpc.flavor." | wc -l`
467 echo "original general rules: $nrule_old"
469 for ((i = $nrule_old; i < $max; i++)); do
470 set_rule $FSNAME elan$i any krb5n || error "set rule $i"
472 for ((i = $nrule_old; i < $max; i++)); do
473 set_rule $FSNAME elan$i any || error "remove rule $i"
476 nrule_new=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
477 | grep "$FSNAME.srpc.flavor." | wc -l`
478 if [ $nrule_new != $nrule_old ]; then
479 error "general rule: $nrule_new != $nrule_old"
483 # target-specific rules
485 nrule_old=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
486 | grep "$FSNAME-MDT0000.srpc.flavor." | wc -l`
487 echo "original target rules: $nrule_old"
489 for ((i = $nrule_old; i < $max; i++)); do
490 set_rule $FSNAME-MDT0000 elan$i any krb5i || error "set rule $i"
492 for ((i = $nrule_old; i < $max; i++)); do
493 set_rule $FSNAME-MDT0000 elan$i any || error "remove rule $i"
496 nrule_new=`do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME 2>/dev/null \
497 | grep "$FSNAME-MDT0000.srpc.flavor." | wc -l`
498 if [ $nrule_new != $nrule_old ]; then
499 error "general rule: $nrule_new != $nrule_old"
502 run_test 99 "set large number of sptlrpc rules"
515 # started from default flavors
516 restore_to_default_flavor
518 # running dbench background
522 # all: null -> krb5n -> krb5a -> krb5i -> krb5p -> plain
524 set_rule $FSNAME any any krb5n
525 wait_flavor all2all krb5n || error_dbench "1"
528 set_rule $FSNAME any any krb5a
529 wait_flavor all2all krb5a || error_dbench "2"
532 set_rule $FSNAME any any krb5i
533 wait_flavor all2all krb5i || error_dbench "3"
536 set_rule $FSNAME any any krb5p
537 wait_flavor all2all krb5p || error_dbench "4"
540 set_rule $FSNAME any any plain
541 wait_flavor all2all plain || error_dbench "5"
550 set_rule $FSNAME any mdt2mdt krb5a
551 wait_flavor mdt2mdt krb5a || error_dbench "6"
554 set_rule $FSNAME any cli2mdt krb5i
555 wait_flavor cli2mdt krb5i || error_dbench "7"
558 set_rule $FSNAME any mdt2ost krb5p
559 wait_flavor mdt2ost krb5p || error_dbench "8"
562 set_rule $FSNAME any cli2ost krb5n
563 wait_flavor cli2ost krb5n || error_dbench "9"
570 # nothing should be changed because they are override by above dir rules
572 set_rule $FSNAME-MDT0000 any any krb5p
573 set_rule $FSNAME-OST0000 any any krb5i
574 wait_flavor mdt2mdt krb5a || error_dbench "10"
575 wait_flavor cli2mdt krb5i || error_dbench "11"
577 wait_flavor mdt2ost krb5p || error_dbench "12"
578 wait_flavor cli2ost krb5n || error_dbench "13"
581 # delete all dir-specific rules
583 set_rule $FSNAME any mdt2mdt
584 set_rule $FSNAME any cli2mdt
585 set_rule $FSNAME any mdt2ost
586 set_rule $FSNAME any cli2ost
587 wait_flavor mdt2mdt krb5p $((MDSCOUNT - 1)) || error_dbench "14"
588 wait_flavor cli2mdt krb5p $(get_clients_mount_count) || error_dbench "15"
590 wait_flavor mdt2ost krb5i $MDSCOUNT || error_dbench "16"
591 wait_flavor cli2ost krb5i $(get_clients_mount_count) || error_dbench "17"
599 set_rule $FSNAME-MDT0000 any any
600 set_rule $FSNAME-OST0000 any any || error_dbench "18"
601 wait_flavor all2all plain || error_dbench "19"
606 run_test 100 "change security flavor on the fly under load"
612 local filename=$DIR/$tfile
617 # after set to flavor0, start multop which use flavor0 rpc, and let
618 # server drop the reply; then switch to flavor1, the resend should be
619 # completed using flavor1. To exercise the code of switching ctx/sec
620 # for a resend request.
622 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
624 set_rule $FSNAME any cli2mdt $flavor0
625 wait_flavor cli2mdt $flavor0
626 rm -f $filename || error "remove old $filename failed"
629 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
630 do_facet $SINGLEMDS lctl set_param fail_val=36
631 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
632 log "starting multiop"
633 multiop $filename m &
635 echo "multiop pid=$multiop_pid"
638 set_rule $FSNAME any cli2mdt $flavor1
639 wait_flavor cli2mdt $flavor1
641 num=`ps --no-headers -p $multiop_pid 2>/dev/null | wc -l`
642 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
643 echo "process $multiop_pid is still hanging there... OK"
645 do_facet $SINGLEMDS lctl set_param fail_loc=0
646 log "waiting for multiop ($multiop_pid) to finish"
647 wait $multiop_pid || error "multiop returned error"
652 # started from default flavors
653 restore_to_default_flavor
655 switch_sec_test null plain
656 switch_sec_test plain krb5n
657 switch_sec_test krb5n krb5a
658 switch_sec_test krb5a krb5i
659 switch_sec_test krb5i krb5p
660 switch_sec_test krb5p null
661 switch_sec_test null krb5p
662 switch_sec_test krb5p krb5i
663 switch_sec_test krb5i plain
664 switch_sec_test plain krb5p
666 run_test 101 "switch ctx/sec for resending request"
679 # started from default flavors
680 restore_to_default_flavor
682 # run dbench background
685 echo "Testing null->krb5n->krb5a->krb5i->krb5p->plain->null"
686 set_rule $FSNAME any any krb5n
687 set_rule $FSNAME any any krb5a
688 set_rule $FSNAME any any krb5i
689 set_rule $FSNAME any any krb5p
690 set_rule $FSNAME any any plain
691 set_rule $FSNAME any any null
694 wait_flavor all2all null || error_dbench "1"
697 echo "waiting for 15s and check again"
701 echo "Testing null->krb5i->null->krb5i->null..."
702 for ((i=0; i<10; i++)); do
703 set_rule $FSNAME any any krb5i
704 set_rule $FSNAME any any null
706 set_rule $FSNAME any any krb5i
709 wait_flavor all2all krb5i || error_dbench "2"
712 echo "waiting for 15s and check again"
718 run_test 102 "survive from insanely fast flavor switch"
723 local clients=$CLIENTS
725 [ -z $clients ] && clients=$HOSTNAME
727 # started from default flavors
728 restore_to_default_flavor
730 # at this time no rules has been set on mgs; mgc use null
731 # flavor connect to mgs.
732 count=`flvr_cnt_mgc2mgs null`
733 [ $count -eq 1 ] || error "$count mgc connection use null flavor"
735 zconf_umount_clients $clients $MOUNT || return 1
737 # mount client with conflict flavor - should fail
739 MOUNTOPT="$MOUNTOPT,mgssec=krb5p"
740 zconf_mount_clients $clients $MOUNT && \
741 error "mount with conflict flavor should have failed"
744 # mount client with same flavor - should succeed
746 MOUNTOPT="$MOUNTOPT,mgssec=null"
747 zconf_mount_clients $clients $MOUNT || \
748 error "mount with same flavor should have succeeded"
750 zconf_umount_clients $clients $MOUNT || return 2
752 # mount client with default flavor - should succeed
753 zconf_mount_clients $clients $MOUNT || \
754 error "mount with default flavor should have succeeded"
756 run_test 150 "secure mgs connection: client flavor setting"
761 # set mgs only accept krb5p
762 set_rule _mgs any any krb5p
764 # umount everything, modules still loaded
767 # mount mgs with default flavor, in current framework it means mgs+mdt1.
768 # the connection of mgc of mdt1 to mgs is expected fail.
769 DEVNAME=$(mdsdevname 1)
770 start mds1 $DEVNAME $MDS_MOUNT_OPTS && error "mount with default flavor should have failed"
772 # mount with unauthorized flavor should fail
773 save_opts=$MDS_MOUNT_OPTS
774 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=null"
775 start mds1 $DEVNAME $MDS_MOUNT_OPTS && error "mount with unauthorized flavor should have failed"
776 MDS_MOUNT_OPTS=$save_opts
778 # mount with designated flavor should succeed
779 save_opts=$MDS_MOUNT_OPTS
780 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=krb5p"
781 start mds1 $DEVNAME $MDS_MOUNT_OPTS || error "mount with designated flavor should have succeeded"
782 MDS_MOUNT_OPTS=$save_opts
786 run_test 151 "secure mgs connection: server flavor control"
788 equals_msg `basename $0`: test complete, cleaning up
789 check_and_cleanup_lustre
790 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true