3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
6 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
12 . $LUSTRE/tests/test-framework.sh
16 ALWAYS_EXCEPT="$SANITY_GSS_EXCEPT"
18 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
22 require_dsh_mds || exit 0
24 # $RUNAS_ID may get set incorrectly somewhere else
25 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
26 error "RUNAS_ID set to 0, but UID is also 0!"
28 # remove $SEC, we'd like to control everything by ourselves
32 # global variables of this sanity
40 # Overwrite RUNAS command to use su - instead,
41 # to initialize the process more completely.
42 # This is required to get proper access to keyrings.
43 RUNAS="runas_su $(id -n -u $RUNAS_ID)"
46 which klist || skip "Kerberos env not setup"
47 which kinit || skip "Kerberos env not setup"
50 prepare_krb5_creds() {
51 echo prepare krb5 cred
53 $RUNAS krb5_login.sh || exit 1
59 # we want double mount
60 MOUNT_2=${MOUNT_2:-"yes"}
61 check_and_setup_lustre
63 rm -rf $DIR/[df][0-9]*
65 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
69 local NPROC=$(grep -c ^processor /proc/cpuinfo)
70 [ $NPROC -gt 2 ] && NPROC=2
71 bash rundbench -D $DIR/$tdir $NPROC 1>/dev/null &
75 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
76 if [ $num -ne 1 ]; then
77 error "failed to start dbench $NPROC"
79 echo "started dbench with $NPROC processes at background"
87 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
88 if [ $num -eq 0 ]; then
89 echo "dbench $DBENCH_PID already finished"
90 wait $DBENCH_PID || error "dbench $PID exit with error"
92 elif [ $num -ne 1 ]; then
94 error "found $num instance of pid $DBENCH_PID ???"
103 killall dbench 2>/dev/null
104 local num=$(ps --no-headers -p $DBENCH_PID | wc -l)
105 if [ $num -eq 0 ]; then
106 echo "dbench finished"
109 echo "dbench $DBENCH_PID is still running, waiting 2s..."
113 wait $DBENCH_PID || true
127 # obtain and cache Kerberos ticket-granting ticket
133 if [ -z "$myRUNAS" ]; then
134 error_exit "myRUNAS command must be specified for refresh_krb5_tgt"
137 CLIENTS=${CLIENTS:-$HOSTNAME}
138 do_nodes $CLIENTS "set -x
139 if ! $myRUNAS krb5_login.sh; then
140 echo "Failed to refresh Krb5 TGT for UID/GID $myRUNAS_UID/$myRUNAS_GID."
145 restore_krb5_cred() {
146 local keys=$(keyctl show | awk '$6 ~ "^lgssc:" {print $1}')
152 $RUNAS krb5_login.sh || exit 1
155 check_multiple_gss_daemons() {
158 local gssd_name=$(basename $gssd)
160 for ((i = 0; i < 10; i++)); do
161 do_facet $facet "$gssd -vvv"
164 # wait daemons entering "stable" status
167 local num=$(do_facet $facet ps -o cmd -C $gssd_name |
169 echo "$num instance(s) of $gssd_name are running"
171 if [ $num -ne 1 ]; then
172 error "$gssd_name not unique"
182 echo "bring up gss daemons..."
185 echo "check with someone already running..."
186 check_multiple_gss_daemons $my_facet $LSVCGSSD
188 echo "check with someone run & finished..."
189 do_facet $my_facet killall -q -2 lgssd $LSVCGSSD || true
190 sleep 5 # wait fully exit
191 check_multiple_gss_daemons $my_facet $LSVCGSSD
193 echo "check refresh..."
194 do_facet $my_facet killall -q -2 lgssd $LSVCGSSD || true
195 sleep 5 # wait fully exit
196 do_facet $my_facet ipcrm -S 0x3b92d473
197 check_multiple_gss_daemons $my_facet $LSVCGSSD
199 run_test 0 "start multiple gss daemons"
204 local file=$DIR/$tdir/$tfile
206 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
207 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
208 $RUNAS ls -ld $DIR/$tdir
211 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
212 $RUNAS touch $file && error "unexpected success"
216 $RUNAS touch $file || error "should not fail"
217 [ -f $file ] || error "$file not found"
219 run_test 1a "access with or without krb5 credential"
222 local file=$DIR/$tdir/$tfile
223 local lgssconf=/etc/request-key.d/lgssc.conf
224 local clients=$CLIENTS
227 [ -z $clients ] && clients=$HOSTNAME
228 zconf_umount_clients $clients $MOUNT || error "umount clients failed"
230 echo "stop gss daemons..."
233 # get local realm from krb5.conf, assume the same for all nodes
234 realm=$(grep default_realm /etc/krb5.conf | awk '{print $3}')
236 # add -R option to lgss_keyring on local client
237 cp $lgssconf $TMP/lgssc.conf
238 stack_trap "yes | cp $TMP/lgssc.conf $lgssconf" EXIT
239 sed -i s+lgss_keyring+\&\ \-R\ $realm+ $lgssconf
241 # add -R option to lsvcgssd
242 echo "bring up gss daemons..."
243 start_gss_daemons '' '' "-R $realm"
244 stack_trap "stop_gss_daemons ; start_gss_daemons" EXIT
246 zconf_mount_clients $clients $MOUNT || error "mount clients failed"
248 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
249 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
250 $RUNAS touch $file || error "touch $file failed"
251 [ -f $file ] || error "$file not found"
253 run_test 1b "Use specified realm"
256 local file1=$DIR/$tdir/$tfile-1
257 local file2=$DIR/$tdir/$tfile-2
259 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
260 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
262 # current access should be ok
263 $RUNAS touch $file1 || error "can't touch $file1"
264 [ -f $file1 ] || error "$file1 not found"
266 # cleanup all cred/ctx and touch
267 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
268 $RUNAS touch $file2 && error "unexpected success"
272 $RUNAS touch $file2 || error "should not fail"
273 [ -f $file2 ] || error "$file2 not found"
275 run_test 2 "lfs flushctx"
278 local file=$DIR/$tdir/$tfile
280 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
281 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
284 echo "aaaaaaaaaaaaaaaaa" > $file
286 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
287 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
288 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
291 $RUNAS $MULTIOP $file o_r &
293 # wait multiop finish its open()
296 # cleanup all cred/ctx and check
297 # metadata check should fail, but file data check should succeed
298 # because we always use root credential to OSTs
299 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
300 echo "destroyed credentials/contexs for $RUNAS_ID"
301 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
302 kill -s 10 $(pgrep -u $USER0 $MULTIOP)
303 wait $OPPID || error "read file data failed"
304 echo "read file data OK"
306 # restore and check again
308 echo "restored credentials for $RUNAS_ID"
309 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
310 echo "$RUNAS_ID checkstat OK"
311 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
312 echo "$UID checkstat OK"
313 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
314 echo "$RUNAS_ID read file data OK"
316 run_test 3 "local cache under DLM lock"
319 local file1=$DIR/$tdir/$tfile-1
320 local file2=$DIR/$tdir/$tfile-2
321 local file3=$DIR/$tdir/$tfile-3
322 local wait_time=$((TIMEOUT + TIMEOUT / 2))
324 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
325 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
327 # current access should be ok
328 $RUNAS touch $file1 || error "can't touch $file1"
329 [ -f $file1 ] || error "$file1 not found"
332 $RUNAS $LFS flushctx $MOUNT || error "can't flush context (1)"
335 send_sigint $(comma_list $(mdts_nodes)) $LSVCGSSD
337 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD &&
338 error "$LSVCGSSD still running (1)"
340 # daemon should restart automatically, at least on newer servers
342 if [ $? -ne 0 ]; then
343 echo "$RUNAS touch $file2 failed"
344 (( MDS1_VERSION < $(version_code 2.15.61) )) ||
345 error "$LSVCGSSD should restart automatically"
347 echo "$RUNAS touch $file2 succeeded"
351 if (( MDS1_VERSION >= $(version_code 2.15.61) )); then
352 $RUNAS $LFS flushctx $MOUNT || error "can't flush context (2)"
356 send_sigint $(comma_list $(mdts_nodes)) $LSVCGSSD
358 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD &&
359 error "$LSVCGSSD still running (2)"
361 # restart lsvcgssd, expect touch succeed
362 echo "restart $LSVCGSSD and recovering"
363 start_gss_daemons $(comma_list $(mdts_nodes)) $LSVCGSSD "-vvv"
365 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD
366 $RUNAS touch $file3 || error "should not fail now"
367 [ -f $file3 ] || error "$file3 not found"
369 run_test 5 "lsvcgssd dead, operations pass"
374 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
375 for ((i=0; i<$nfile; i++)); do
376 dd if=/dev/zero of=$DIR/$tdir/$tfile-$i bs=8k count=1 ||
377 error "dd $tfile-$i failed"
379 ls -l $DIR/$tdir/* > /dev/null || error "ls failed"
380 rm -rf $DIR2/$tdir/* || error "rm failed"
381 rmdir $DIR2/$tdir || error "rmdir failed"
383 run_test 6 "test basic DLM callback works"
388 # for open(), client only reserve space for default stripe count lovea,
389 # and server may return larger lovea in reply (because of larger stripe
390 # count), client need call enlarge_reqbuf() and save the replied lovea
391 # in request for future possible replay.
393 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
394 # the path, however it does work in local test which has 2 OSTs and
395 # default stripe count is 1.
396 [[ $OSTCOUNT -ge 2 ]] || skip_env "needs >= 2 OSTs"
398 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
399 $LFS setstripe -c $OSTCOUNT $DIR/$tdir || error "setstripe -c $OSTCOUNT"
402 for ((i = 0; i < 20; i++)); do
403 dd if=/dev/zero of=$DIR/$tdir/f$i bs=4k count=16 2>/dev/null
406 for ((i = 0; i < 20; i++)); do
407 dd if=$DIR/$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
410 run_test 7 "exercise enlarge_reqbuf()"
414 local atoldbase=$(do_facet $SINGLEMDS "$LCTL get_param -n at_history")
417 do_facet $SINGLEMDS "$LCTL set_param at_history=8" || true
419 "do_facet $SINGLEMDS $LCTL set_param at_history=$atoldbase" EXIT
425 $RUNAS keyctl show @u
426 echo Flushing gss ctxs
427 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
428 $RUNAS keyctl show @u
432 stack_trap debugrestore EXIT
433 $LCTL set_param debug=+other
435 # wait for the at estimation come down, this is faster
437 req_delay=$($LCTL get_param -n \
438 mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
439 awk '/portal 12/ {print $5}' | tail -1)
440 [ $req_delay -le 5 ] && break
441 echo "current AT estimation is $req_delay, wait a little bit"
444 req_delay=$((${req_delay} + ${req_delay} / 4 + 5))
446 # sleep sometime in ctx handle
447 do_facet $SINGLEMDS $LCTL set_param fail_val=$req_delay
448 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x00001204
449 #define CFS_FAIL_ONCE 0x80000000
450 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001204
452 $RUNAS touch $DIR/$tdir/$tfile &
454 echo "waiting for touch (pid $TOUCHPID) to finish..."
455 wait $TOUCHPID || error "touch should have succeeded"
456 $RUNAS keyctl show @u
458 $LCTL dk | grep -i "Early reply #" || error "No early reply"
460 run_test 8 "Early reply sent for slow gss context negotiation"
463 # following tests will manipulate flavors and may end with any flavor set,
464 # so each test should not assume any start flavor.
468 if [ "$SLOW" = "no" ]; then
476 restore_to_default_flavor
481 for ((n = 1; n <= $total; n++)); do
484 echo "flush ctx ($n/$total) ..."
485 $LFS flushctx -k -r $MOUNT ||
486 error "can't flush context on $MOUNT"
489 #sleep to let ctxs be re-established
493 run_test 90 "recoverable from losing contexts under load"
503 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
504 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
505 echo "original general rules: $nrule_old"
507 for ((i = $nrule_old; i < $max; i++)); do
508 set_rule $FSNAME ${NETTYPE}$i cli2mdt krb5n ||
509 error "set rule $i (1)"
510 set_rule $FSNAME ${NETTYPE}$i cli2ost krb5n ||
511 error "set rule $i (2)"
512 set_rule $FSNAME ${NETTYPE}$i mdt2ost null ||
513 error "set rule $i (3)"
514 set_rule $FSNAME ${NETTYPE}$i mdt2mdt null ||
515 error "set rule $i (4)"
517 for ((i = $nrule_old; i < $max; i++)); do
518 set_rule $FSNAME ${NETTYPE}$i cli2mdt ||
519 error "remove rule $i (1)"
520 set_rule $FSNAME ${NETTYPE}$i cli2ost ||
521 error "remove rule $i (2)"
522 set_rule $FSNAME ${NETTYPE}$i mdt2ost ||
523 error "remove rule $i (3)"
524 set_rule $FSNAME ${NETTYPE}$i mdt2mdt ||
525 error "remove rule $i (4)"
529 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
530 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
531 if [ $nrule_new != $nrule_old ]; then
532 error "general rule: $nrule_new != $nrule_old"
536 # target-specific rules
538 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
539 2>/dev/null | grep -c "$FSNAME-MDT0000.srpc.flavor.")
540 echo "original target rules: $nrule_old"
542 for ((i = $nrule_old; i < $max; i++)); do
543 set_rule $FSNAME-MDT0000 ${NETTYPE}$i cli2mdt krb5i ||
544 error "set new rule $i (1)"
545 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2ost null ||
546 error "set new rule $i (2)"
547 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2mdt null ||
548 error "set new rule $i (3)"
550 for ((i = $nrule_old; i < $max; i++)); do
551 set_rule $FSNAME-MDT0000 ${NETTYPE}$i cli2mdt ||
552 error "remove new rule $i (1)"
553 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2ost ||
554 error "remove new rule $i (2)"
555 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2mdt ||
556 error "remove new rule $i (3)"
559 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
560 2>/dev/null \ | grep -c "$FSNAME-MDT0000.srpc.flavor.")
561 if [ $nrule_new != $nrule_old ]; then
562 error "general rule: $nrule_new != $nrule_old"
565 run_test 99 "set large number of sptlrpc rules"
568 # started from default flavors
569 restore_to_default_flavor
573 # running dbench in background
577 # all: null -> krb5n -> krb5a -> krb5i -> krb5p
595 # nothing should be changed because they are overridden by above rules
597 set_rule $FSNAME-MDT0000 any cli2mdt krb5a
598 set_rule $FSNAME-OST0000 any cli2ost krb5i
599 wait_flavor cli2mdt krb5p || error_dbench "1"
601 wait_flavor cli2ost krb5p || error_dbench "2"
608 set_rule $FSNAME-MDT0000 any cli2mdt
609 set_rule $FSNAME-OST0000 any cli2ost
615 set_rule $FSNAME any mdt2mdt
616 set_rule $FSNAME any cli2mdt
617 set_rule $FSNAME any mdt2ost
618 set_rule $FSNAME any cli2ost
619 restore_to_default_flavor
624 run_test 100 "change security flavor on the fly under load"
630 local filename=$DIR/$tfile
635 # after setting flavor0, start multiop which uses flavor0 rpc, and let
636 # server drop the reply; then switch to flavor1, the resend should be
637 # completed using flavor1. To exercise the code of switching ctx/sec
638 # for a resend request.
640 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
642 set_rule $FSNAME any cli2mdt $flavor0
643 wait_flavor cli2mdt $flavor0
644 rm -f $filename || error "remove old $filename failed"
647 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
648 do_facet $SINGLEMDS lctl set_param fail_val=36
649 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
650 log "starting multiop"
651 $MULTIOP $filename m &
653 echo "multiop pid=$multiop_pid"
656 set_rule $FSNAME any cli2mdt $flavor1
657 wait_flavor cli2mdt $flavor1
659 num=$(ps --no-headers -p $multiop_pid 2>/dev/null | wc -l)
660 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
661 echo "process $multiop_pid is still hanging there... OK"
663 do_facet $SINGLEMDS lctl set_param fail_loc=0
664 log "waiting for multiop ($multiop_pid) to finish"
665 wait $multiop_pid || error "multiop returned error"
670 # started from default flavors
671 restore_to_default_flavor
673 switch_sec_test null krb5n
674 switch_sec_test krb5n krb5a
675 switch_sec_test krb5a krb5i
676 switch_sec_test krb5i krb5p
677 switch_sec_test krb5p null
679 run_test 101 "switch ctx/sec for resending request"
692 # started from default flavors
693 restore_to_default_flavor
697 # run dbench background
700 echo "Testing null->krb5n->krb5a->krb5i->krb5p->null"
709 echo "waiting for 15s and check again"
713 echo "Testing null->krb5i->null->krb5i->null..."
714 for ((idx = 0; idx < 5; idx++)); do
722 echo "waiting for 15s and check again"
728 run_test 102 "survive from fast flavor switch"
733 local clients=$CLIENTS
735 [ -z $clients ] && clients=$HOSTNAME
737 # started from default flavors
738 restore_to_default_flavor
740 # at this time no rules has been set on mgs; mgc use null
741 # flavor to connect to mgs
742 count=$(flvr_cnt_mgc2mgs null)
743 [ $count -eq 1 ] || error "$count mgc connections use null flavor"
745 zconf_umount_clients $clients $MOUNT || error "umount failed (1)"
747 # mount client with conflict flavor - should fail
748 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
749 zconf_mount_clients $clients $MOUNT $mount_opts &&
750 error "mount with conflict flavor should have failed"
752 # mount client with same flavor - should succeed
753 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=null"
754 zconf_mount_clients $clients $MOUNT $mount_opts ||
755 error "mount with same flavor should have succeeded"
756 zconf_umount_clients $clients $MOUNT || error "umount failed (2)"
758 # mount client with default flavor - should succeed
759 zconf_mount_clients $clients $MOUNT ||
760 error "mount with default flavor should have succeeded"
762 run_test 150 "secure mgs connection: client flavor setting"
766 set_rule _mgs any any
768 # umount everything, then remount
776 stack_trap exit_151 EXIT
778 # set mgs rule to only accept krb5p
779 set_rule _mgs any any krb5p
781 # umount everything, modules still loaded
784 # start gss daemon on mgs node
785 combined_mgs_mds || start_gss_daemons $mgs_HOST $LSVCGSSD "-vvv"
788 start mgs $(mgsdevname 1) $MDS_MOUNT_OPTS
790 # mount with default flavor, expected to fail
791 start ost1 "$(ostdevname 1)" $OST_MOUNT_OPTS
792 wait_mgc_import_state ost1 FULL 0 &&
793 error "mount with default flavor should have failed"
796 # mount with unauthorized flavor should fail
797 if [ -z "$OST_MOUNT_OPTS" ]; then
798 new_opts="-o mgssec=null"
800 new_opts="$OST_MOUNT_OPTS,mgssec=null"
802 start ost1 "$(ostdevname 1)" $new_opts
803 wait_mgc_import_state ost1 FULL 0 &&
804 error "mount with unauthorized flavor should have failed"
807 # mount with designated flavor should succeed
808 if [ -z "$OST_MOUNT_OPTS" ]; then
809 new_opts="-o mgssec=krb5p"
811 new_opts="$OST_MOUNT_OPTS,mgssec=krb5p"
813 start ost1 "$(ostdevname 1)" $new_opts
814 wait_mgc_import_state ost1 FULL 0 ||
815 error "mount with designated flavor should have succeeded"
819 run_test 151 "secure mgs connection: server flavor control"
821 complete_test $SECONDS
824 check_and_cleanup_lustre