3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
6 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
12 . $LUSTRE/tests/test-framework.sh
16 ALWAYS_EXCEPT="$SANITY_GSS_EXCEPT"
18 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
22 require_dsh_mds || exit 0
24 # $RUNAS_ID may get set incorrectly somewhere else
25 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
26 error "RUNAS_ID set to 0, but UID is also 0!"
28 # remove $SEC, we'd like to control everything by ourselves
32 # global variables of this sanity
35 KRB5_CRED=$KRB5_CCACHE_DIR/krb5cc_$RUNAS_ID
36 KRB5_CRED_SAVE=$KRB5_CCACHE_DIR/krb5cc.sanity.save
43 prepare_krb5_creds() {
44 echo prepare krb5 cred
47 $RUNAS krb5_login.sh || exit 1
48 [ -f $KRB5_CRED ] || exit 2
50 cp $KRB5_CRED $KRB5_CRED_SAVE
55 # we want double mount
56 MOUNT_2=${MOUNT_2:-"yes"}
57 check_and_setup_lustre
59 rm -rf $DIR/[df][0-9]*
61 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
65 local NPROC=$(grep -c ^processor /proc/cpuinfo)
66 [ $NPROC -gt 2 ] && NPROC=2
67 bash rundbench $NPROC 1>/dev/null &
71 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
72 if [ $num -ne 1 ]; then
73 error "failed to start dbench $NPROC"
75 echo "started dbench with $NPROC processes at background"
83 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
84 if [ $num -eq 0 ]; then
85 echo "dbench $DBENCH_PID already finished"
86 wait $DBENCH_PID || error "dbench $PID exit with error"
88 elif [ $num -ne 1 ]; then
90 error "found $num instance of pid $DBENCH_PID ???"
99 killall dbench 2>/dev/null
100 local num=$(ps --no-headers -p $DBENCH_PID | wc -l)
101 if [ $num -eq 0 ]; then
102 echo "dbench finished"
105 echo "dbench $DBENCH_PID is still running, waiting 2s..."
109 wait $DBENCH_PID || true
113 restore_krb5_cred() {
115 cp $KRB5_CRED_SAVE $KRB5_CRED
116 chown $RUNAS_ID:$RUNAS_ID $KRB5_CRED
117 chmod 0600 $KRB5_CRED
120 check_multiple_gss_daemons() {
123 local gssd_name=$(basename $gssd)
125 for ((i = 0; i < 10; i++)); do
126 do_facet $facet "$gssd -v"
129 # wait daemons entering "stable" status
132 local num=$(do_facet $facet ps -o cmd -C $gssd_name |
134 echo "$num instance(s) of $gssd_name are running"
136 if [ $num -ne 1 ]; then
137 error "$gssd_name not unique"
147 echo "bring up gss daemons..."
150 echo "check with someone already running..."
151 check_multiple_gss_daemons $my_facet $LSVCGSSD
153 check_multiple_gss_daemons $my_facet $LGSSD
156 echo "check with someone run & finished..."
157 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
158 sleep 5 # wait fully exit
159 check_multiple_gss_daemons $my_facet $LSVCGSSD
161 check_multiple_gss_daemons $my_facet $LGSSD
164 echo "check refresh..."
165 do_facet $my_facet killall -q -2 lgssd lsvcgssd || true
166 sleep 5 # wait fully exit
167 do_facet $my_facet ipcrm -S 0x3b92d473
168 check_multiple_gss_daemons $my_facet $LSVCGSSD
170 do_facet $my_facet ipcrm -S 0x3a92d473
171 check_multiple_gss_daemons $my_facet $LGSSD
174 run_test 0 "start multiple gss daemons"
179 local file=$DIR/$tfile
181 chmod 0777 $DIR || error "chmod $DIR failed"
185 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
186 $RUNAS touch $file && error "unexpected success"
190 $RUNAS touch $file || error "should not fail"
191 [ -f $file ] || error "$file not found"
193 run_test 1 "access with or without krb5 credential"
196 local file1=$DIR/$tfile-1
197 local file2=$DIR/$tfile-2
199 chmod 0777 $DIR || error "chmod $DIR failed"
200 # current access should be ok
201 $RUNAS touch $file1 || error "can't touch $file1"
202 [ -f $file1 ] || error "$file1 not found"
204 # cleanup all cred/ctx and touch
206 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
207 $RUNAS touch $file2 && error "unexpected success"
211 $RUNAS touch $file2 || error "should not fail"
212 [ -f $file2 ] || error "$file2 not found"
214 run_test 2 "lfs flushctx"
217 local file=$DIR/$tfile
220 echo "aaaaaaaaaaaaaaaaa" > $file
222 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
223 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
224 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
227 $RUNAS $MULTIOP $file o_r &
229 # wait multiop finish its open()
232 # cleanup all cred/ctx and check
233 # metadata check should fail, but file data check should success
234 # because we always use root credential to OSTs
236 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
237 echo "destroyed credentials/contexs for $RUNAS_ID"
238 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
240 wait $OPPID || error "read file data failed"
241 echo "read file data OK"
243 # restore and check again
245 echo "restored credentials for $RUNAS_ID"
246 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
247 echo "$RUNAS_ID checkstat OK"
248 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
249 echo "$UID checkstat OK"
250 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
251 echo "$RUNAS_ID read file data OK"
253 run_test 3 "local cache under DLM lock"
256 local file1=$DIR/$tfile-1
257 local file2=$DIR/$tfile-2
259 ! $GSS_PIPEFS && skip "pipefs not used" && return
261 chmod 0777 $DIR || error "chmod $DIR failed"
262 # current access should be ok
263 $RUNAS touch $file1 || error "can't touch $file1"
264 [ -f $file1 ] || error "$file1 not found"
267 send_sigint client lgssd
269 check_gss_daemon_facet client lgssd && error "lgssd still running"
271 # flush context, and touch
272 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
273 $RUNAS touch $file2 &
275 echo "waiting touch pid $TOUCHPID"
276 wait $TOUCHPID && error "touch should fail"
279 do_facet client "$LGSSD -v"
281 check_gss_daemon_facet client lgssd
283 # touch new should succeed
284 $RUNAS touch $file2 || error "can't touch $file2"
285 [ -f $file2 ] || error "$file2 not found"
287 run_test 4 "lgssd dead, operations should wait timeout and fail"
290 local file1=$DIR/$tfile-1
291 local file2=$DIR/$tfile-2
292 local wait_time=$((TIMEOUT + TIMEOUT / 2))
294 chmod 0777 $DIR || error "chmod $DIR failed"
295 # current access should be ok
296 $RUNAS touch $file1 || error "can't touch $file1"
297 [ -f $file1 ] || error "$file1 not found"
300 send_sigint $(comma_list $(mdts_nodes)) lsvcgssd
302 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd &&
303 error "lsvcgssd still running"
305 # flush context, and touch
306 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
307 $RUNAS touch $file2 && error 'should fail without lsvcgssd'
309 # restart lsvcgssd, expect touch suceed
310 echo "restart lsvcgssd and recovering"
311 start_gss_daemons $(comma_list $(mdts_nodes)) "$LSVCGSSD -v"
313 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) lsvcgssd
315 $RUNAS touch $file2 || error 'should not fail now'
316 [ -f $file2 ] || error "$file2 not found"
318 run_test 5 "lsvcgssd dead, operations fail"
323 mkdir $DIR/d6 || error "mkdir $DIR/d6 failed"
324 for ((i=0; i<$nfile; i++)); do
325 dd if=/dev/zero of=$DIR/d6/file$i bs=8k count=1 ||
326 error "dd file$i failed"
328 ls -l $DIR/d6/* > /dev/null || error "ls failed"
329 rm -rf $DIR2/d6/* || error "rm failed"
330 rmdir $DIR2/d6/ || error "rmdir failed"
332 run_test 6 "test basic DLM callback works"
338 # for open(), client only reserve space for default stripe count lovea,
339 # and server may return larger lovea in reply (because of larger stripe
340 # count), client need call enlarge_reqbuf() and save the replied lovea
341 # in request for future possible replay.
343 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
344 # the path, however it does work in local test which has 2 OSTs and
345 # default stripe count is 1.
346 num_osts=$($LFS getstripe $MOUNT | egrep -c "^[0-9]*:.*ACTIVE")
347 echo "found $num_osts active OSTs"
348 [ $num_osts -lt 2 ] &&
349 echo "skipping $TESTNAME (must have >= 2 OSTs)" && return
351 mkdir $tdir || error "mkdir $tdir failed"
352 $LFS setstripe -c $num_osts $tdir || error "setstripe -c $num_osts"
355 for ((i = 0; i < 20; i++)); do
356 dd if=/dev/zero of=$tdir/f$i bs=4k count=16 2>/dev/null
359 for ((i = 0; i < 20; i++)); do
360 dd if=$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
364 run_test 7 "exercise enlarge_reqbuf()"
368 local ATHISTORY=$(do_facet $SINGLEMDS "find /sys/ -name at_history")
369 local ATOLDBASE=$(do_facet $SINGLEMDS "cat $ATHISTORY")
371 do_facet $SINGLEMDS "echo 8 >> $ATHISTORY"
378 sysctl -w lnet.debug="+other"
380 # wait for the at estimation come down, this is faster
382 REQ_DELAY=$(lctl get_param -n \
383 mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
384 awk '/portal 12/ {print $5}' | tail -1)
385 [ $REQ_DELAY -le 5 ] && break
386 echo "current AT estimation is $REQ_DELAY, wait a little bit"
389 REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
391 # sleep sometime in ctx handle
392 do_facet $SINGLEMDS lctl set_param fail_val=$REQ_DELAY
393 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
394 do_facet $SINGLEMDS lctl set_param fail_loc=0x1204
396 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
398 $RUNAS touch $DIR/d8/f &
400 echo "waiting for touch (pid $TOUCHPID) to finish..."
401 sleep 2 # give it a chance to really trigger context init rpc
402 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
403 wait $TOUCHPID || error "touch should have succeeded"
405 $LCTL dk | grep -i "Early reply #" || error "No early reply"
408 do_facet $SINGLEMDS "echo $ATOLDBASE >> $ATHISTORY" || true
410 run_test 8 "Early reply sent for slow gss context negotiation"
413 # following tests will manipulate flavors and may end with any flavor set,
414 # so each test should not assume any start flavor.
418 if [ "$SLOW" = "no" ]; then
424 restore_to_default_flavor
425 set_rule $FSNAME any any krb5p
426 wait_flavor all2all krb5p
430 for ((n=0;n<$total;n++)); do
433 echo "flush ctx ($n/$total) ..."
434 $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
437 #sleep to let ctxs be re-established
441 run_test 90 "recoverable from losing contexts under load"
451 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
452 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
453 echo "original general rules: $nrule_old"
455 for ((i = $nrule_old; i < $max; i++)); do
456 set_rule $FSNAME ${NETTYPE}$i any krb5n || error "set rule $i"
458 for ((i = $nrule_old; i < $max; i++)); do
459 set_rule $FSNAME ${NETTYPE}$i any || error "remove rule $i"
462 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
463 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
464 if [ $nrule_new != $nrule_old ]; then
465 error "general rule: $nrule_new != $nrule_old"
469 # target-specific rules
471 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
472 2>/dev/null | grep -c "$FSNAME-MDT0000.srpc.flavor.")
473 echo "original target rules: $nrule_old"
475 for ((i = $nrule_old; i < $max; i++)); do
476 set_rule $FSNAME-MDT0000 ${NETTYPE}$i any krb5i || error "set rule $i"
478 for ((i = $nrule_old; i < $max; i++)); do
479 set_rule $FSNAME-MDT0000 ${NETTYPE}$i any || error "remove rule $i"
482 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
483 2>/dev/null \ | grep -c "$FSNAME-MDT0000.srpc.flavor.")
484 if [ $nrule_new != $nrule_old ]; then
485 error "general rule: $nrule_new != $nrule_old"
488 run_test 99 "set large number of sptlrpc rules"
501 # started from default flavors
502 restore_to_default_flavor
504 # running dbench background
508 # all: null -> krb5n -> krb5a -> krb5i -> krb5p -> plain
510 set_rule $FSNAME any any krb5n
511 wait_flavor all2all krb5n || error_dbench "1"
514 set_rule $FSNAME any any krb5a
515 wait_flavor all2all krb5a || error_dbench "2"
518 set_rule $FSNAME any any krb5i
519 wait_flavor all2all krb5i || error_dbench "3"
522 set_rule $FSNAME any any krb5p
523 wait_flavor all2all krb5p || error_dbench "4"
526 set_rule $FSNAME any any plain
527 wait_flavor all2all plain || error_dbench "5"
536 set_rule $FSNAME any mdt2mdt krb5a
537 wait_flavor mdt2mdt krb5a || error_dbench "6"
540 set_rule $FSNAME any cli2mdt krb5i
541 wait_flavor cli2mdt krb5i || error_dbench "7"
544 set_rule $FSNAME any mdt2ost krb5p
545 wait_flavor mdt2ost krb5p || error_dbench "8"
548 set_rule $FSNAME any cli2ost krb5n
549 wait_flavor cli2ost krb5n || error_dbench "9"
556 # nothing should be changed because they are override by above dir rules
558 set_rule $FSNAME-MDT0000 any any krb5p
559 set_rule $FSNAME-OST0000 any any krb5i
560 wait_flavor mdt2mdt krb5a || error_dbench "10"
561 wait_flavor cli2mdt krb5i || error_dbench "11"
563 wait_flavor mdt2ost krb5p || error_dbench "12"
564 wait_flavor cli2ost krb5n || error_dbench "13"
567 # delete all dir-specific rules
569 set_rule $FSNAME any mdt2mdt
570 set_rule $FSNAME any cli2mdt
571 set_rule $FSNAME any mdt2ost
572 set_rule $FSNAME any cli2ost
573 wait_flavor mdt2mdt krb5p $((MDSCOUNT - 1)) || error_dbench "14"
574 wait_flavor cli2mdt krb5p $(get_clients_mount_count) ||
577 wait_flavor mdt2ost krb5i $MDSCOUNT || error_dbench "16"
578 wait_flavor cli2ost krb5i $(get_clients_mount_count) ||
587 set_rule $FSNAME-MDT0000 any any
588 set_rule $FSNAME-OST0000 any any || error_dbench "18"
589 wait_flavor all2all plain || error_dbench "19"
594 run_test 100 "change security flavor on the fly under load"
600 local filename=$DIR/$tfile
605 # after set to flavor0, start multop which use flavor0 rpc, and let
606 # server drop the reply; then switch to flavor1, the resend should be
607 # completed using flavor1. To exercise the code of switching ctx/sec
608 # for a resend request.
610 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
612 set_rule $FSNAME any cli2mdt $flavor0
613 wait_flavor cli2mdt $flavor0
614 rm -f $filename || error "remove old $filename failed"
617 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
618 do_facet $SINGLEMDS lctl set_param fail_val=36
619 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
620 log "starting multiop"
621 $MULTIOP $filename m &
623 echo "multiop pid=$multiop_pid"
626 set_rule $FSNAME any cli2mdt $flavor1
627 wait_flavor cli2mdt $flavor1
629 num=$(ps --no-headers -p $multiop_pid 2>/dev/null | wc -l)
630 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
631 echo "process $multiop_pid is still hanging there... OK"
633 do_facet $SINGLEMDS lctl set_param fail_loc=0
634 log "waiting for multiop ($multiop_pid) to finish"
635 wait $multiop_pid || error "multiop returned error"
640 # started from default flavors
641 restore_to_default_flavor
643 switch_sec_test null plain
644 switch_sec_test plain krb5n
645 switch_sec_test krb5n krb5a
646 switch_sec_test krb5a krb5i
647 switch_sec_test krb5i krb5p
648 switch_sec_test krb5p null
649 switch_sec_test null krb5p
650 switch_sec_test krb5p krb5i
651 switch_sec_test krb5i plain
652 switch_sec_test plain krb5p
654 run_test 101 "switch ctx/sec for resending request"
667 # started from default flavors
668 restore_to_default_flavor
670 # run dbench background
673 echo "Testing null->krb5n->krb5a->krb5i->krb5p->plain->null"
674 set_rule $FSNAME any any krb5n
675 set_rule $FSNAME any any krb5a
676 set_rule $FSNAME any any krb5i
677 set_rule $FSNAME any any krb5p
678 set_rule $FSNAME any any plain
679 set_rule $FSNAME any any null
682 wait_flavor all2all null || error_dbench "1"
685 echo "waiting for 15s and check again"
689 echo "Testing null->krb5i->null->krb5i->null..."
690 for ((i=0; i<10; i++)); do
691 set_rule $FSNAME any any krb5i
692 set_rule $FSNAME any any null
694 set_rule $FSNAME any any krb5i
697 wait_flavor all2all krb5i || error_dbench "2"
700 echo "waiting for 15s and check again"
706 run_test 102 "survive from insanely fast flavor switch"
711 local clients=$CLIENTS
713 [ -z $clients ] && clients=$HOSTNAME
715 # started from default flavors
716 restore_to_default_flavor
718 # at this time no rules has been set on mgs; mgc use null
719 # flavor connect to mgs.
720 count=$(flvr_cnt_mgc2mgs null)
721 [ $count -eq 1 ] || error "$count mgc connection use null flavor"
723 zconf_umount_clients $clients $MOUNT || return 1
725 # mount client with conflict flavor - should fail
726 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
727 zconf_mount_clients $clients $MOUNT $mount_opts &&
728 error "mount with conflict flavor should have failed"
730 # mount client with same flavor - should succeed
731 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=null"
732 zconf_mount_clients $clients $MOUNT $mount_opts ||
733 error "mount with same flavor should have succeeded"
734 zconf_umount_clients $clients $MOUNT || return 2
736 # mount client with default flavor - should succeed
737 zconf_mount_clients $clients $MOUNT ||
738 error "mount with default flavor should have succeeded"
740 run_test 150 "secure mgs connection: client flavor setting"
745 # set mgs only accept krb5p
746 set_rule _mgs any any krb5p
748 # umount everything, modules still loaded
751 # start gss daemon on mgs node
752 combined_mgs_mds || start_gss_daemons $mgs_HOST "$LSVCGSSD -v"
755 start mgs $(mgsdevname 1) $MDS_MOUNT_OPTS
757 # mount mgs with default flavor, in current framework it means mgs+mdt1.
758 # the connection of mgc of mdt1 to mgs is expected fail.
759 DEVNAME=$(mdsdevname 1)
760 start mds1 $DEVNAME $MDS_MOUNT_OPTS
761 wait_mgc_import_state mds FULL 0 &&
762 error "mount with default flavor should have failed"
765 # mount with unauthorized flavor should fail
766 save_opts=$MDS_MOUNT_OPTS
767 if [ -z "$MDS_MOUNT_OPTS" ]; then
768 MDS_MOUNT_OPTS="-o mgssec=null"
770 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=null"
772 start mds1 $DEVNAME $MDS_MOUNT_OPTS
773 wait_mgc_import_state mds FULL 0 &&
774 error "mount with unauthorized flavor should have failed"
775 MDS_MOUNT_OPTS=$save_opts
778 # mount with designated flavor should succeed
779 save_opts=$MDS_MOUNT_OPTS
780 if [ -z "$MDS_MOUNT_OPTS" ]; then
781 MDS_MOUNT_OPTS="-o mgssec=krb5p"
783 MDS_MOUNT_OPTS="$MDS_MOUNT_OPTS,mgssec=krb5p"
785 start mds1 $DEVNAME $MDS_MOUNT_OPTS
786 wait_mgc_import_state mds FULL 0 ||
787 error "mount with designated flavor should have succeeded"
788 MDS_MOUNT_OPTS=$save_opts
792 run_test 151 "secure mgs connection: server flavor control"
795 check_and_cleanup_lustre