3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
6 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
10 # bug number for skipped test:
11 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$SANITY_GSS_EXCEPT"}
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
17 export MULTIOP=${MULTIOP:-multiop}
19 LUSTRE=${LUSTRE:-$(dirname $0)/..}
20 . $LUSTRE/tests/test-framework.sh
22 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
25 require_dsh_mds || exit 0
27 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
29 # $RUNAS_ID may get set incorrectly somewhere else
30 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
31 error "RUNAS_ID set to 0, but UID is also 0!"
33 # remove $SEC, we'd like to control everything by ourselves
37 # global variables of this sanity
40 KRB5_CRED=$KRB5_CCACHE_DIR/krb5cc_$RUNAS_ID
41 KRB5_CRED_SAVE=$KRB5_CCACHE_DIR/krb5cc.sanity.save
48 prepare_krb5_creds() {
49 echo prepare krb5 cred
52 $RUNAS krb5_login.sh || exit 1
53 [ -f $KRB5_CRED ] || exit 2
55 cp $KRB5_CRED $KRB5_CRED_SAVE
60 # we want double mount
61 MOUNT_2=${MOUNT_2:-"yes"}
62 check_and_setup_lustre
64 rm -rf $DIR/[df][0-9]*
66 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
72 local NPROC=$(grep -c ^processor /proc/cpuinfo)
73 [ $NPROC -gt 2 ] && NPROC=2
74 sh rundbench $NPROC 1>/dev/null &
78 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
79 if [ $num -ne 1 ]; then
80 error "failed to start dbench $NPROC"
82 echo "started dbench with $NPROC processes at background"
90 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
91 if [ $num -eq 0 ]; then
92 echo "dbench $DBENCH_PID already finished"
93 wait $DBENCH_PID || error "dbench $PID exit with error"
95 elif [ $num -ne 1 ]; then
97 error "found $num instance of pid $DBENCH_PID ???"
106 killall dbench 2>/dev/null
107 local num=$(ps --no-headers -p $DBENCH_PID | wc -l)
108 if [ $num -eq 0 ]; then
109 echo "dbench finished"
112 echo "dbench $DBENCH_PID is still running, waiting 2s..."
116 wait $DBENCH_PID || true
120 restore_krb5_cred() {
122 cp $KRB5_CRED_SAVE $KRB5_CRED
123 chown $RUNAS_ID:$RUNAS_ID $KRB5_CRED
124 chmod 0600 $KRB5_CRED
127 check_multiple_gss_daemons() {
130 local gssd_name=$(basename $gssd)
132 for ((i = 0; i < 10; i++)); do
133 do_facet $facet "$gssd -v"
136 # wait daemons entering "stable" status
139 local num=$(do_facet $facet ps -o cmd -C $gssd_name |
141 echo "$num instance(s) of $gssd_name are running"
143 if [ $num -ne 1 ]; then
144 error "$gssd_name not unique"
154 echo "bring up gss daemons..."
157 echo "check with someone already running..."
158 check_multiple_gss_daemons $my_facet $LSVCGSSD
160 check_multiple_gss_daemons $my_facet $LGSSD
163 echo "check with someone run & finished..."
164 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
165 sleep 5 # wait fully exit
166 check_multiple_gss_daemons $my_facet $LSVCGSSD
168 check_multiple_gss_daemons $my_facet $LGSSD
171 echo "check refresh..."
172 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
173 sleep 5 # wait fully exit
174 do_facet $my_facet ipcrm -S 0x3b92d473
175 check_multiple_gss_daemons $my_facet $LSVCGSSD
177 do_facet $my_facet ipcrm -S 0x3a92d473
178 check_multiple_gss_daemons $my_facet $LGSSD
181 run_test 0 "start multiple gss daemons"
186 local file=$DIR/$tfile
188 chmod 0777 $DIR || error "chmod $DIR failed"
192 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
193 $RUNAS touch $file && error "unexpected success"
197 $RUNAS touch $file || error "should not fail"
198 [ -f $file ] || error "$file not found"
200 run_test 1 "access with or without krb5 credential"
203 local file1=$DIR/$tfile-1
204 local file2=$DIR/$tfile-2
206 chmod 0777 $DIR || error "chmod $DIR failed"
207 # current access should be ok
208 $RUNAS touch $file1 || error "can't touch $file1"
209 [ -f $file1 ] || error "$file1 not found"
211 # cleanup all cred/ctx and touch
213 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
214 $RUNAS touch $file2 && error "unexpected success"
218 $RUNAS touch $file2 || error "should not fail"
219 [ -f $file2 ] || error "$file2 not found"
221 run_test 2 "lfs flushctx"
224 local file=$DIR/$tfile
227 echo "aaaaaaaaaaaaaaaaa" > $file
229 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
230 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
231 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
234 $RUNAS $MULTIOP $file o_r &
236 # wait multiop finish its open()
239 # cleanup all cred/ctx and check
240 # metadata check should fail, but file data check should success
241 # because we always use root credential to OSTs
243 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
244 echo "destroyed credentials/contexs for $RUNAS_ID"
245 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
247 wait $OPPID || error "read file data failed"
248 echo "read file data OK"
250 # restore and check again
252 echo "restored credentials for $RUNAS_ID"
253 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
254 echo "$RUNAS_ID checkstat OK"
255 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
256 echo "$UID checkstat OK"
257 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
258 echo "$RUNAS_ID read file data OK"
260 run_test 3 "local cache under DLM lock"
263 local file1=$DIR/$tfile-1
264 local file2=$DIR/$tfile-2
266 ! $GSS_PIPEFS && skip "pipefs not used" && return
268 chmod 0777 $DIR || error "chmod $DIR failed"
269 # current access should be ok
270 $RUNAS touch $file1 || error "can't touch $file1"
271 [ -f $file1 ] || error "$file1 not found"
274 send_sigint client lgssd
276 check_gss_daemon_facet client lgssd && error "lgssd still running"
278 # flush context, and touch
279 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
280 $RUNAS touch $file2 &
282 echo "waiting touch pid $TOUCHPID"
283 wait $TOUCHPID && error "touch should fail"
286 do_facet client "$LGSSD -v"
288 check_gss_daemon_facet client lgssd
290 # touch new should succeed
291 $RUNAS touch $file2 || error "can't touch $file2"
292 [ -f $file2 ] || error "$file2 not found"
294 run_test 4 "lgssd dead, operations should wait timeout and fail"
297 local file1=$DIR/$tfile-1
298 local file2=$DIR/$tfile-2
299 local wait_time=$((TIMEOUT + TIMEOUT / 2))
301 chmod 0777 $DIR || error "chmod $DIR failed"
302 # current access should be ok
303 $RUNAS touch $file1 || error "can't touch $file1"
304 [ -f $file1 ] || error "$file1 not found"
307 send_sigint $(comma_list $(mdts_nodes)) lsvcgssd
309 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd &&
310 error "lsvcgssd still running"
312 # flush context, and touch
313 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
314 $RUNAS touch $file2 && error 'should fail without lsvcgssd'
316 # restart lsvcgssd, expect touch suceed
317 echo "restart lsvcgssd and recovering"
318 start_gss_daemons $(comma_list $(mdts_nodes)) "$LSVCGSSD -v"
320 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd
322 $RUNAS touch $file2 || error 'should not fail now'
323 [ -f $file2 ] || error "$file2 not found"
325 run_test 5 "lsvcgssd dead, operations fail"
330 mkdir $DIR/d6 || error "mkdir $DIR/d6 failed"
331 for ((i=0; i<$nfile; i++)); do
332 dd if=/dev/zero of=$DIR/d6/file$i bs=8k count=1 ||
333 error "dd file$i failed"
335 ls -l $DIR/d6/* > /dev/null || error "ls failed"
336 rm -rf $DIR2/d6/* || error "rm failed"
337 rmdir $DIR2/d6/ || error "rmdir failed"
339 run_test 6 "test basic DLM callback works"
345 # for open(), client only reserve space for default stripe count lovea,
346 # and server may return larger lovea in reply (because of larger stripe
347 # count), client need call enlarge_reqbuf() and save the replied lovea
348 # in request for future possible replay.
350 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
351 # the path, however it does work in local test which has 2 OSTs and
352 # default stripe count is 1.
353 num_osts=$($LFS getstripe $MOUNT | egrep -c "^[0-9]*:.*ACTIVE")
354 echo "found $num_osts active OSTs"
355 [ $num_osts -lt 2 ] &&
356 echo "skipping $TESTNAME (must have >= 2 OSTs)" && return
358 mkdir $tdir || error "mkdir $tdir failed"
359 $LFS setstripe -c $num_osts $tdir || error "setstripe -c $num_osts"
362 for ((i = 0; i < 20; i++)); do
363 dd if=/dev/zero of=$tdir/f$i bs=4k count=16 2>/dev/null
366 for ((i = 0; i < 20; i++)); do
367 dd if=$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
371 run_test 7 "exercise enlarge_reqbuf()"
375 local ATHISTORY=$(do_facet $SINGLEMDS "find /sys/ -name at_history")
376 local ATOLDBASE=$(do_facet $SINGLEMDS "cat $ATHISTORY")
378 do_facet $SINGLEMDS "echo 8 >> $ATHISTORY"
385 sysctl -w lnet.debug="+other"
387 # wait for the at estimation come down, this is faster
389 REQ_DELAY=$(lctl get_param -n \
390 mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
391 awk '/portal 12/ {print $5}' | tail -1)
392 [ $REQ_DELAY -le 5 ] && break
393 echo "current AT estimation is $REQ_DELAY, wait a little bit"
396 REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
398 # sleep sometime in ctx handle
399 do_facet $SINGLEMDS lctl set_param fail_val=$REQ_DELAY
400 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
401 do_facet $SINGLEMDS lctl set_param fail_loc=0x1204
403 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
405 $RUNAS touch $DIR/d8/f &
407 echo "waiting for touch (pid $TOUCHPID) to finish..."
408 sleep 2 # give it a chance to really trigger context init rpc
409 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
410 wait $TOUCHPID || error "touch should have succeeded"
412 $LCTL dk | grep -i "Early reply #" || error "No early reply"
415 do_facet $SINGLEMDS "echo $ATOLDBASE >> $ATHISTORY" || true
417 run_test 8 "Early reply sent for slow gss context negotiation"
420 # following tests will manipulate flavors and may end with any flavor set,
421 # so each test should not assume any start flavor.
425 if [ "$SLOW" = "no" ]; then
431 restore_to_default_flavor
432 set_rule $FSNAME any any krb5p
433 wait_flavor all2all krb5p
437 for ((n=0;n<$total;n++)); do
440 echo "flush ctx ($n/$total) ..."
441 $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
444 #sleep to let ctxs be re-established
448 run_test 90 "recoverable from losing contexts under load"
458 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
459 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
460 echo "original general rules: $nrule_old"
462 for ((i = $nrule_old; i < $max; i++)); do
463 set_rule $FSNAME ${NETTYPE}$i any krb5n || error "set rule $i"
465 for ((i = $nrule_old; i < $max; i++)); do
466 set_rule $FSNAME ${NETTYPE}$i any || error "remove rule $i"
469 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
470 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
471 if [ $nrule_new != $nrule_old ]; then
472 error "general rule: $nrule_new != $nrule_old"
476 # target-specific rules
478 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
479 2>/dev/null | grep -c "$FSNAME-MDT0000.srpc.flavor.")
480 echo "original target rules: $nrule_old"
482 for ((i = $nrule_old; i < $max; i++)); do
483 set_rule $FSNAME-MDT0000 ${NETTYPE}$i any krb5i || error "set rule $i"
485 for ((i = $nrule_old; i < $max; i++)); do
486 set_rule $FSNAME-MDT0000 ${NETTYPE}$i any || error "remove rule $i"
489 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
490 2>/dev/null \ | grep -c "$FSNAME-MDT0000.srpc.flavor.")
491 if [ $nrule_new != $nrule_old ]; then
492 error "general rule: $nrule_new != $nrule_old"
495 run_test 99 "set large number of sptlrpc rules"
508 # started from default flavors
509 restore_to_default_flavor
511 # running dbench background
515 # all: null -> krb5n -> krb5a -> krb5i -> krb5p -> plain
517 set_rule $FSNAME any any krb5n
518 wait_flavor all2all krb5n || error_dbench "1"
521 set_rule $FSNAME any any krb5a
522 wait_flavor all2all krb5a || error_dbench "2"
525 set_rule $FSNAME any any krb5i
526 wait_flavor all2all krb5i || error_dbench "3"
529 set_rule $FSNAME any any krb5p
530 wait_flavor all2all krb5p || error_dbench "4"
533 set_rule $FSNAME any any plain
534 wait_flavor all2all plain || error_dbench "5"
543 set_rule $FSNAME any mdt2mdt krb5a
544 wait_flavor mdt2mdt krb5a || error_dbench "6"
547 set_rule $FSNAME any cli2mdt krb5i
548 wait_flavor cli2mdt krb5i || error_dbench "7"
551 set_rule $FSNAME any mdt2ost krb5p
552 wait_flavor mdt2ost krb5p || error_dbench "8"
555 set_rule $FSNAME any cli2ost krb5n
556 wait_flavor cli2ost krb5n || error_dbench "9"
563 # nothing should be changed because they are override by above dir rules
565 set_rule $FSNAME-MDT0000 any any krb5p
566 set_rule $FSNAME-OST0000 any any krb5i
567 wait_flavor mdt2mdt krb5a || error_dbench "10"
568 wait_flavor cli2mdt krb5i || error_dbench "11"
570 wait_flavor mdt2ost krb5p || error_dbench "12"
571 wait_flavor cli2ost krb5n || error_dbench "13"
574 # delete all dir-specific rules
576 set_rule $FSNAME any mdt2mdt
577 set_rule $FSNAME any cli2mdt
578 set_rule $FSNAME any mdt2ost
579 set_rule $FSNAME any cli2ost
580 wait_flavor mdt2mdt krb5p $((MDSCOUNT - 1)) || error_dbench "14"
581 wait_flavor cli2mdt krb5p $(get_clients_mount_count) ||
584 wait_flavor mdt2ost krb5i $MDSCOUNT || error_dbench "16"
585 wait_flavor cli2ost krb5i $(get_clients_mount_count) ||
594 set_rule $FSNAME-MDT0000 any any
595 set_rule $FSNAME-OST0000 any any || error_dbench "18"
596 wait_flavor all2all plain || error_dbench "19"
601 run_test 100 "change security flavor on the fly under load"
607 local filename=$DIR/$tfile
612 # after set to flavor0, start multop which use flavor0 rpc, and let
613 # server drop the reply; then switch to flavor1, the resend should be
614 # completed using flavor1. To exercise the code of switching ctx/sec
615 # for a resend request.
617 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
619 set_rule $FSNAME any cli2mdt $flavor0
620 wait_flavor cli2mdt $flavor0
621 rm -f $filename || error "remove old $filename failed"
624 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
625 do_facet $SINGLEMDS lctl set_param fail_val=36
626 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
627 log "starting multiop"
628 $MULTIOP $filename m &
630 echo "multiop pid=$multiop_pid"
633 set_rule $FSNAME any cli2mdt $flavor1
634 wait_flavor cli2mdt $flavor1
636 num=$(ps --no-headers -p $multiop_pid 2>/dev/null | wc -l)
637 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
638 echo "process $multiop_pid is still hanging there... OK"
640 do_facet $SINGLEMDS lctl set_param fail_loc=0
641 log "waiting for multiop ($multiop_pid) to finish"
642 wait $multiop_pid || error "multiop returned error"
647 # started from default flavors
648 restore_to_default_flavor
650 switch_sec_test null plain
651 switch_sec_test plain krb5n
652 switch_sec_test krb5n krb5a
653 switch_sec_test krb5a krb5i
654 switch_sec_test krb5i krb5p
655 switch_sec_test krb5p null
656 switch_sec_test null krb5p
657 switch_sec_test krb5p krb5i
658 switch_sec_test krb5i plain
659 switch_sec_test plain krb5p
661 run_test 101 "switch ctx/sec for resending request"
674 # started from default flavors
675 restore_to_default_flavor
677 # run dbench background
680 echo "Testing null->krb5n->krb5a->krb5i->krb5p->plain->null"
681 set_rule $FSNAME any any krb5n
682 set_rule $FSNAME any any krb5a
683 set_rule $FSNAME any any krb5i
684 set_rule $FSNAME any any krb5p
685 set_rule $FSNAME any any plain
686 set_rule $FSNAME any any null
689 wait_flavor all2all null || error_dbench "1"
692 echo "waiting for 15s and check again"
696 echo "Testing null->krb5i->null->krb5i->null..."
697 for ((i=0; i<10; i++)); do
698 set_rule $FSNAME any any krb5i
699 set_rule $FSNAME any any null
701 set_rule $FSNAME any any krb5i
704 wait_flavor all2all krb5i || error_dbench "2"
707 echo "waiting for 15s and check again"
713 run_test 102 "survive from insanely fast flavor switch"
718 local clients=$CLIENTS
720 [ -z $clients ] && clients=$HOSTNAME
722 # started from default flavors
723 restore_to_default_flavor
725 # at this time no rules has been set on mgs; mgc use null
726 # flavor connect to mgs.
727 count=$(flvr_cnt_mgc2mgs null)
728 [ $count -eq 1 ] || error "$count mgc connection use null flavor"
730 zconf_umount_clients $clients $MOUNT || return 1
732 # mount client with conflict flavor - should fail
733 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
734 zconf_mount_clients $clients $MOUNT $mount_opts &&
735 error "mount with conflict flavor should have failed"
737 # mount client with same flavor - should succeed
738 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=null"
739 zconf_mount_clients $clients $MOUNT $mount_opts ||
740 error "mount with same flavor should have succeeded"
741 zconf_umount_clients $clients $MOUNT || return 2
743 # mount client with default flavor - should succeed
744 zconf_mount_clients $clients $MOUNT ||
745 error "mount with default flavor should have succeeded"
747 run_test 150 "secure mgs connection: client flavor setting"
752 # set mgs only accept krb5p
753 set_rule _mgs any any krb5p
755 # umount everything, modules still loaded
758 # start gss daemon on mgs node
759 combined_mgs_mds || start_gss_daemons $mgs_HOST "$LSVCGSSD -v"
762 start mgs $(mgsdevname 1) $MDS_MOUNT_OPTS
764 # mount mgs with default flavor, in current framework it means mgs+mdt1.
765 # the connection of mgc of mdt1 to mgs is expected fail.
766 DEVNAME=$(mdsdevname 1)
767 start mds1 $DEVNAME $MDS_MOUNT_OPTS
768 wait_mgc_import_state mds FULL 0 &&
769 error "mount with default flavor should have failed"
772 # mount with unauthorized flavor should fail
773 save_opts=$MDS_MOUNT_OPTS
774 if [ -z "$MDS_MOUNT_OPTS" ]; then
775 MDS_MOUNT_OPTS="-o mgssec=null"
777 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=null"
779 start mds1 $DEVNAME $MDS_MOUNT_OPTS
780 wait_mgc_import_state mds FULL 0 &&
781 error "mount with unauthorized flavor should have failed"
782 MDS_MOUNT_OPTS=$save_opts
785 # mount with designated flavor should succeed
786 save_opts=$MDS_MOUNT_OPTS
787 if [ -z "$MDS_MOUNT_OPTS" ]; then
788 MDS_MOUNT_OPTS="-o mgssec=krb5p"
790 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=krb5p"
792 start mds1 $DEVNAME $MDS_MOUNT_OPTS
793 wait_mgc_import_state mds FULL 0 ||
794 error "mount with designated flavor should have succeeded"
795 MDS_MOUNT_OPTS=$save_opts
799 run_test 151 "secure mgs connection: server flavor control"
802 check_and_cleanup_lustre