3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
6 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
10 # bug number for skipped test:
11 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$SANITY_GSS_EXCEPT"}
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
17 export MULTIOP=${MULTIOP:-multiop}
19 LUSTRE=${LUSTRE:-$(dirname $0)/..}
20 . $LUSTRE/tests/test-framework.sh
22 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
25 require_dsh_mds || exit 0
27 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
29 # $RUNAS_ID may get set incorrectly somewhere else
30 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
31 error "RUNAS_ID set to 0, but UID is also 0!"
33 # remove $SEC, we'd like to control everything by ourselves
37 # global variables of this sanity
40 KRB5_CRED=$KRB5_CCACHE_DIR/krb5cc_$RUNAS_ID
41 KRB5_CRED_SAVE=$KRB5_CCACHE_DIR/krb5cc.sanity.save
48 prepare_krb5_creds() {
49 echo prepare krb5 cred
52 $RUNAS krb5_login.sh || exit 1
53 [ -f $KRB5_CRED ] || exit 2
55 cp $KRB5_CRED $KRB5_CRED_SAVE
60 # we want double mount
61 MOUNT_2=${MOUNT_2:-"yes"}
62 check_and_setup_lustre
64 rm -rf $DIR/[df][0-9]*
66 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
72 local NPROC=$(grep -c ^processor /proc/cpuinfo)
73 [ $NPROC -gt 2 ] && NPROC=2
74 sh rundbench $NPROC 1>/dev/null &
78 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
79 if [ $num -ne 1 ]; then
80 error "failed to start dbench $NPROC"
82 echo "started dbench with $NPROC processes at background"
90 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
91 if [ $num -eq 0 ]; then
92 echo "dbench $DBENCH_PID already finished"
93 wait $DBENCH_PID || error "dbench $PID exit with error"
95 elif [ $num -ne 1 ]; then
97 error "found $num instance of pid $DBENCH_PID ???"
106 killall dbench 2>/dev/null
107 local num=$(ps --no-headers -p $DBENCH_PID | wc -l)
108 if [ $num -eq 0 ]; then
109 echo "dbench finished"
112 echo "dbench $DBENCH_PID is still running, waiting 2s..."
116 wait $DBENCH_PID || true
120 restore_krb5_cred() {
121 cp $KRB5_CRED_SAVE $KRB5_CRED
122 chown $RUNAS_ID:$RUNAS_ID $KRB5_CRED
123 chmod 0600 $KRB5_CRED
126 check_multiple_gss_daemons() {
129 local gssd_name=$(basename $gssd)
131 for ((i = 0; i < 10; i++)); do
132 do_facet $facet "$gssd -v &"
135 # wait daemons entering "stable" status
138 local num=$(do_facet $facet ps -o cmd -C $gssd_name |
140 echo "$num instance(s) of $gssd_name are running"
142 if [ $num -ne 1 ]; then
143 error "$gssd_name not unique"
153 echo "bring up gss daemons..."
156 echo "check with someone already running..."
157 check_multiple_gss_daemons $my_facet $LSVCGSSD
159 check_multiple_gss_daemons $my_facet $LGSSD
162 echo "check with someone run & finished..."
163 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
164 sleep 5 # wait fully exit
165 check_multiple_gss_daemons $my_facet $LSVCGSSD
167 check_multiple_gss_daemons $my_facet $LGSSD
170 echo "check refresh..."
171 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
172 sleep 5 # wait fully exit
173 do_facet $my_facet ipcrm -S 0x3b92d473
174 check_multiple_gss_daemons $my_facet $LSVCGSSD
176 do_facet $my_facet ipcrm -S 0x3a92d473
177 check_multiple_gss_daemons $my_facet $LGSSD
180 run_test 0 "start multiple gss daemons"
185 local file=$DIR/$tfile
187 chmod 0777 $DIR || error "chmod $DIR failed"
191 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
192 $RUNAS touch $file && error "unexpected success"
196 $RUNAS touch $file || error "should not fail"
197 [ -f $file ] || error "$file not found"
199 run_test 1 "access with or without krb5 credential"
202 local file1=$DIR/$tfile-1
203 local file2=$DIR/$tfile-2
205 chmod 0777 $DIR || error "chmod $DIR failed"
206 # current access should be ok
207 $RUNAS touch $file1 || error "can't touch $file1"
208 [ -f $file1 ] || error "$file1 not found"
210 # cleanup all cred/ctx and touch
212 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
213 $RUNAS touch $file2 && error "unexpected success"
217 $RUNAS touch $file2 || error "should not fail"
218 [ -f $file2 ] || error "$file2 not found"
220 run_test 2 "lfs flushctx"
223 local file=$DIR/$tfile
226 echo "aaaaaaaaaaaaaaaaa" > $file
228 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
229 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
230 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
233 $RUNAS $MULTIOP $file o_r &
235 # wait multiop finish its open()
238 # cleanup all cred/ctx and check
239 # metadata check should fail, but file data check should success
240 # because we always use root credential to OSTs
242 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
243 echo "destroyed credentials/contexs for $RUNAS_ID"
244 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
246 wait $OPPID || error "read file data failed"
247 echo "read file data OK"
249 # restore and check again
251 echo "restored credentials for $RUNAS_ID"
252 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
253 echo "$RUNAS_ID checkstat OK"
254 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
255 echo "$UID checkstat OK"
256 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
257 echo "$RUNAS_ID read file data OK"
259 run_test 3 "local cache under DLM lock"
262 local file1=$DIR/$tfile-1
263 local file2=$DIR/$tfile-2
265 ! $GSS_PIPEFS && skip "pipefs not used" && return
267 chmod 0777 $DIR || error "chmod $DIR failed"
268 # current access should be ok
269 $RUNAS touch $file1 || error "can't touch $file1"
270 [ -f $file1 ] || error "$file1 not found"
273 send_sigint client lgssd
275 check_gss_daemon_facet client lgssd && error "lgssd still running"
277 # flush context, and touch
278 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
279 $RUNAS touch $file2 &
281 echo "waiting touch pid $TOUCHPID"
282 wait $TOUCHPID && error "touch should fail"
285 do_facet client "$LGSSD -v"
287 check_gss_daemon_facet client lgssd
289 # touch new should succeed
290 $RUNAS touch $file2 || error "can't touch $file2"
291 [ -f $file2 ] || error "$file2 not found"
293 run_test 4 "lgssd dead, operations should wait timeout and fail"
296 local file1=$DIR/$tfile-1
297 local file2=$DIR/$tfile-2
298 local wait_time=$((TIMEOUT + TIMEOUT / 2))
300 chmod 0777 $DIR || error "chmod $DIR failed"
301 # current access should be ok
302 $RUNAS touch $file1 || error "can't touch $file1"
303 [ -f $file1 ] || error "$file1 not found"
306 send_sigint $(comma_list $(mdts_nodes)) lsvcgssd
308 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd &&
309 error "lsvcgssd still running"
311 # flush context, and touch
312 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
313 $RUNAS touch $file2 &
317 echo "waiting $wait_time seconds for touch pid $TOUCHPID"
319 num=$(ps --no-headers -p $TOUCHPID | wc -l)
320 [ $num -eq 1 ] || error "touch already ended ($num)"
321 echo "process $TOUCHPID still hanging there... OK"
323 # restart lsvcgssd, expect touch suceed
324 echo "restart lsvcgssd and recovering"
325 start_gss_daemons $(comma_list $(mdts_nodes)) "$LSVCGSSD -v"
327 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd
328 wait $TOUCHPID || error "touch fail"
329 [ -f $file2 ] || error "$file2 not found"
331 run_test 5 "lsvcgssd dead, operations lead to recovery"
336 mkdir $DIR/d6 || error "mkdir $DIR/d6 failed"
337 for ((i=0; i<$nfile; i++)); do
338 dd if=/dev/zero of=$DIR/d6/file$i bs=8k count=1 ||
339 error "dd file$i failed"
341 ls -l $DIR/d6/* > /dev/null || error "ls failed"
342 rm -rf $DIR2/d6/* || error "rm failed"
343 rmdir $DIR2/d6/ || error "rmdir failed"
345 run_test 6 "test basic DLM callback works"
351 # for open(), client only reserve space for default stripe count lovea,
352 # and server may return larger lovea in reply (because of larger stripe
353 # count), client need call enlarge_reqbuf() and save the replied lovea
354 # in request for future possible replay.
356 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
357 # the path, however it does work in local test which has 2 OSTs and
358 # default stripe count is 1.
359 num_osts=$($LFS getstripe $MOUNT | egrep -c "^[0-9]*:.*ACTIVE")
360 echo "found $num_osts active OSTs"
361 [ $num_osts -lt 2 ] &&
362 echo "skipping $TESTNAME (must have >= 2 OSTs)" && return
364 mkdir $tdir || error "mkdir $tdir failed"
365 $LFS setstripe -c $num_osts $tdir || error "setstripe -c $num_osts"
368 for ((i = 0; i < 20; i++)); do
369 dd if=/dev/zero of=$tdir/f$i bs=4k count=16 2>/dev/null
372 for ((i = 0; i < 20; i++)); do
373 dd if=$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
377 run_test 7 "exercise enlarge_reqbuf()"
381 local ATHISTORY=$(do_facet $SINGLEMDS "find /sys/ -name at_history")
382 local ATOLDBASE=$(do_facet $SINGLEMDS "cat $ATHISTORY")
384 do_facet $SINGLEMDS "echo 8 >> $ATHISTORY"
391 sysctl -w lnet.debug="+other"
393 # wait for the at estimation come down, this is faster
395 REQ_DELAY=$(lctl get_param -n \
396 mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
397 awk '/portal 12/ {print $5}' | tail -1)
398 [ $REQ_DELAY -le 5 ] && break
399 echo "current AT estimation is $REQ_DELAY, wait a little bit"
402 REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
404 # sleep sometime in ctx handle
405 do_facet $SINGLEMDS lctl set_param fail_val=$REQ_DELAY
406 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
407 do_facet $SINGLEMDS lctl set_param fail_loc=0x1204
409 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
411 $RUNAS touch $DIR/d8/f &
413 echo "waiting for touch (pid $TOUCHPID) to finish..."
414 sleep 2 # give it a chance to really trigger context init rpc
415 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
416 wait $TOUCHPID || error "touch should have succeeded"
418 $LCTL dk | grep "Early reply #" || error "No early reply"
421 do_facet $SINGLEMDS "echo $ATOLDBASE >> $ATHISTORY" || true
423 run_test 8 "Early reply sent for slow gss context negotiation"
426 # following tests will manipulate flavors and may end with any flavor set,
427 # so each test should not assume any start flavor.
431 if [ "$SLOW" = "no" ]; then
437 restore_to_default_flavor
438 set_rule $FSNAME any any krb5p
439 wait_flavor all2all krb5p
443 for ((n=0;n<$total;n++)); do
446 echo "flush ctx ($n/$total) ..."
447 $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
450 #sleep to let ctxs be re-established
454 run_test 90 "recoverable from losing contexts under load"
464 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
465 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
466 echo "original general rules: $nrule_old"
468 for ((i = $nrule_old; i < $max; i++)); do
469 set_rule $FSNAME ${NETTYPE}$i any krb5n || error "set rule $i"
471 for ((i = $nrule_old; i < $max; i++)); do
472 set_rule $FSNAME ${NETTYPE}$i any || error "remove rule $i"
475 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
476 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
477 if [ $nrule_new != $nrule_old ]; then
478 error "general rule: $nrule_new != $nrule_old"
482 # target-specific rules
484 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
485 2>/dev/null | grep -c "$FSNAME-MDT0000.srpc.flavor.")
486 echo "original target rules: $nrule_old"
488 for ((i = $nrule_old; i < $max; i++)); do
489 set_rule $FSNAME-MDT0000 ${NETTYPE}$i any krb5i || error "set rule $i"
491 for ((i = $nrule_old; i < $max; i++)); do
492 set_rule $FSNAME-MDT0000 ${NETTYPE}$i any || error "remove rule $i"
495 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
496 2>/dev/null \ | grep -c "$FSNAME-MDT0000.srpc.flavor.")
497 if [ $nrule_new != $nrule_old ]; then
498 error "general rule: $nrule_new != $nrule_old"
501 run_test 99 "set large number of sptlrpc rules"
514 # started from default flavors
515 restore_to_default_flavor
517 # running dbench background
521 # all: null -> krb5n -> krb5a -> krb5i -> krb5p -> plain
523 set_rule $FSNAME any any krb5n
524 wait_flavor all2all krb5n || error_dbench "1"
527 set_rule $FSNAME any any krb5a
528 wait_flavor all2all krb5a || error_dbench "2"
531 set_rule $FSNAME any any krb5i
532 wait_flavor all2all krb5i || error_dbench "3"
535 set_rule $FSNAME any any krb5p
536 wait_flavor all2all krb5p || error_dbench "4"
539 set_rule $FSNAME any any plain
540 wait_flavor all2all plain || error_dbench "5"
549 set_rule $FSNAME any mdt2mdt krb5a
550 wait_flavor mdt2mdt krb5a || error_dbench "6"
553 set_rule $FSNAME any cli2mdt krb5i
554 wait_flavor cli2mdt krb5i || error_dbench "7"
557 set_rule $FSNAME any mdt2ost krb5p
558 wait_flavor mdt2ost krb5p || error_dbench "8"
561 set_rule $FSNAME any cli2ost krb5n
562 wait_flavor cli2ost krb5n || error_dbench "9"
569 # nothing should be changed because they are override by above dir rules
571 set_rule $FSNAME-MDT0000 any any krb5p
572 set_rule $FSNAME-OST0000 any any krb5i
573 wait_flavor mdt2mdt krb5a || error_dbench "10"
574 wait_flavor cli2mdt krb5i || error_dbench "11"
576 wait_flavor mdt2ost krb5p || error_dbench "12"
577 wait_flavor cli2ost krb5n || error_dbench "13"
580 # delete all dir-specific rules
582 set_rule $FSNAME any mdt2mdt
583 set_rule $FSNAME any cli2mdt
584 set_rule $FSNAME any mdt2ost
585 set_rule $FSNAME any cli2ost
586 wait_flavor mdt2mdt krb5p $((MDSCOUNT - 1)) || error_dbench "14"
587 wait_flavor cli2mdt krb5p $(get_clients_mount_count) ||
590 wait_flavor mdt2ost krb5i $MDSCOUNT || error_dbench "16"
591 wait_flavor cli2ost krb5i $(get_clients_mount_count) ||
600 set_rule $FSNAME-MDT0000 any any
601 set_rule $FSNAME-OST0000 any any || error_dbench "18"
602 wait_flavor all2all plain || error_dbench "19"
607 run_test 100 "change security flavor on the fly under load"
613 local filename=$DIR/$tfile
618 # after set to flavor0, start multop which use flavor0 rpc, and let
619 # server drop the reply; then switch to flavor1, the resend should be
620 # completed using flavor1. To exercise the code of switching ctx/sec
621 # for a resend request.
623 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
625 set_rule $FSNAME any cli2mdt $flavor0
626 wait_flavor cli2mdt $flavor0
627 rm -f $filename || error "remove old $filename failed"
630 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
631 do_facet $SINGLEMDS lctl set_param fail_val=36
632 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
633 log "starting multiop"
634 $MULTIOP $filename m &
636 echo "multiop pid=$multiop_pid"
639 set_rule $FSNAME any cli2mdt $flavor1
640 wait_flavor cli2mdt $flavor1
642 num=$(ps --no-headers -p $multiop_pid 2>/dev/null | wc -l)
643 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
644 echo "process $multiop_pid is still hanging there... OK"
646 do_facet $SINGLEMDS lctl set_param fail_loc=0
647 log "waiting for multiop ($multiop_pid) to finish"
648 wait $multiop_pid || error "multiop returned error"
653 # started from default flavors
654 restore_to_default_flavor
656 switch_sec_test null plain
657 switch_sec_test plain krb5n
658 switch_sec_test krb5n krb5a
659 switch_sec_test krb5a krb5i
660 switch_sec_test krb5i krb5p
661 switch_sec_test krb5p null
662 switch_sec_test null krb5p
663 switch_sec_test krb5p krb5i
664 switch_sec_test krb5i plain
665 switch_sec_test plain krb5p
667 run_test 101 "switch ctx/sec for resending request"
680 # started from default flavors
681 restore_to_default_flavor
683 # run dbench background
686 echo "Testing null->krb5n->krb5a->krb5i->krb5p->plain->null"
687 set_rule $FSNAME any any krb5n
688 set_rule $FSNAME any any krb5a
689 set_rule $FSNAME any any krb5i
690 set_rule $FSNAME any any krb5p
691 set_rule $FSNAME any any plain
692 set_rule $FSNAME any any null
695 wait_flavor all2all null || error_dbench "1"
698 echo "waiting for 15s and check again"
702 echo "Testing null->krb5i->null->krb5i->null..."
703 for ((i=0; i<10; i++)); do
704 set_rule $FSNAME any any krb5i
705 set_rule $FSNAME any any null
707 set_rule $FSNAME any any krb5i
710 wait_flavor all2all krb5i || error_dbench "2"
713 echo "waiting for 15s and check again"
719 run_test 102 "survive from insanely fast flavor switch"
724 local clients=$CLIENTS
726 [ -z $clients ] && clients=$HOSTNAME
728 # started from default flavors
729 restore_to_default_flavor
731 # at this time no rules has been set on mgs; mgc use null
732 # flavor connect to mgs.
733 count=$(flvr_cnt_mgc2mgs null)
734 [ $count -eq 1 ] || error "$count mgc connection use null flavor"
736 zconf_umount_clients $clients $MOUNT || return 1
738 # mount client with conflict flavor - should fail
739 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
740 zconf_mount_clients $clients $MOUNT $mount_opts &&
741 error "mount with conflict flavor should have failed"
743 # mount client with same flavor - should succeed
744 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=null"
745 zconf_mount_clients $clients $MOUNT $mount_opts ||
746 error "mount with same flavor should have succeeded"
747 zconf_umount_clients $clients $MOUNT || return 2
749 # mount client with default flavor - should succeed
750 zconf_mount_clients $clients $MOUNT ||
751 error "mount with default flavor should have succeeded"
753 run_test 150 "secure mgs connection: client flavor setting"
758 # set mgs only accept krb5p
759 set_rule _mgs any any krb5p
761 # umount everything, modules still loaded
764 # start gss daemon on mgs node
765 combined_mgs_mds || start_gss_daemons $mgs_HOST "$LSVCGSSD -v"
768 start mgs $(mgsdevname 1) $MDS_MOUNT_OPTS
770 # mount mgs with default flavor, in current framework it means mgs+mdt1.
771 # the connection of mgc of mdt1 to mgs is expected fail.
772 DEVNAME=$(mdsdevname 1)
773 start mds1 $DEVNAME $MDS_MOUNT_OPTS
774 wait_mgc_import_state mds FULL 0 &&
775 error "mount with default flavor should have failed"
778 # mount with unauthorized flavor should fail
779 save_opts=$MDS_MOUNT_OPTS
780 if [ -z "$MDS_MOUNT_OPTS" ]; then
781 MDS_MOUNT_OPTS="-o mgssec=null"
783 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=null"
785 start mds1 $DEVNAME $MDS_MOUNT_OPTS
786 wait_mgc_import_state mds FULL 0 &&
787 error "mount with unauthorized flavor should have failed"
788 MDS_MOUNT_OPTS=$save_opts
791 # mount with designated flavor should succeed
792 save_opts=$MDS_MOUNT_OPTS
793 if [ -z "$MDS_MOUNT_OPTS" ]; then
794 MDS_MOUNT_OPTS="-o mgssec=krb5p"
796 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=krb5p"
798 start mds1 $DEVNAME $MDS_MOUNT_OPTS
799 wait_mgc_import_state mds FULL 0 ||
800 error "mount with designated flavor should have succeeded"
801 MDS_MOUNT_OPTS=$save_opts
805 run_test 151 "secure mgs connection: server flavor control"
808 check_and_cleanup_lustre