3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
6 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
12 . $LUSTRE/tests/test-framework.sh
16 ALWAYS_EXCEPT="$SANITY_GSS_EXCEPT"
18 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
22 require_dsh_mds || exit 0
24 # $RUNAS_ID may get set incorrectly somewhere else
25 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
26 error "RUNAS_ID set to 0, but UID is also 0!"
28 # remove $SEC, we'd like to control everything by ourselves
32 # global variables of this sanity
40 prepare_krb5_creds() {
41 echo prepare krb5 cred
43 $RUNAS krb5_login.sh || exit 1
48 # we want double mount
49 MOUNT_2=${MOUNT_2:-"yes"}
50 check_and_setup_lustre
52 rm -rf $DIR/[df][0-9]*
54 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
58 local NPROC=$(grep -c ^processor /proc/cpuinfo)
59 [ $NPROC -gt 2 ] && NPROC=2
60 bash rundbench -D $DIR/$tdir $NPROC 1>/dev/null &
64 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
65 if [ $num -ne 1 ]; then
66 error "failed to start dbench $NPROC"
68 echo "started dbench with $NPROC processes at background"
76 num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
77 if [ $num -eq 0 ]; then
78 echo "dbench $DBENCH_PID already finished"
79 wait $DBENCH_PID || error "dbench $PID exit with error"
81 elif [ $num -ne 1 ]; then
83 error "found $num instance of pid $DBENCH_PID ???"
92 killall dbench 2>/dev/null
93 local num=$(ps --no-headers -p $DBENCH_PID | wc -l)
94 if [ $num -eq 0 ]; then
95 echo "dbench finished"
98 echo "dbench $DBENCH_PID is still running, waiting 2s..."
102 wait $DBENCH_PID || true
116 restore_krb5_cred() {
117 local keys=$(keyctl show | awk '$6 ~ "^lgssc:" {print $1}')
123 $RUNAS krb5_login.sh || exit 1
126 check_multiple_gss_daemons() {
129 local gssd_name=$(basename $gssd)
131 for ((i = 0; i < 10; i++)); do
132 do_facet $facet "$gssd -vvv"
135 # wait daemons entering "stable" status
138 local num=$(do_facet $facet ps -o cmd -C $gssd_name |
140 echo "$num instance(s) of $gssd_name are running"
142 if [ $num -ne 1 ]; then
143 error "$gssd_name not unique"
153 echo "bring up gss daemons..."
156 echo "check with someone already running..."
157 check_multiple_gss_daemons $my_facet $LSVCGSSD
159 check_multiple_gss_daemons $my_facet $LGSSD
162 echo "check with someone run & finished..."
163 do_facet $my_facet killall -q -2 lgssd $LSVCGSSD || true
164 sleep 5 # wait fully exit
165 check_multiple_gss_daemons $my_facet $LSVCGSSD
167 check_multiple_gss_daemons $my_facet $LGSSD
170 echo "check refresh..."
171 do_facet $my_facet killall -q -2 lgssd $LSVCGSSD || true
172 sleep 5 # wait fully exit
173 do_facet $my_facet ipcrm -S 0x3b92d473
174 check_multiple_gss_daemons $my_facet $LSVCGSSD
176 do_facet $my_facet ipcrm -S 0x3a92d473
177 check_multiple_gss_daemons $my_facet $LGSSD
180 run_test 0 "start multiple gss daemons"
185 local file=$DIR/$tdir/$tfile
187 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
188 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
189 $RUNAS ls -ld $DIR/$tdir
192 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
193 $RUNAS touch $file && error "unexpected success"
197 $RUNAS touch $file || error "should not fail"
198 [ -f $file ] || error "$file not found"
200 run_test 1 "access with or without krb5 credential"
203 local file1=$DIR/$tdir/$tfile-1
204 local file2=$DIR/$tdir/$tfile-2
206 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
207 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
209 # current access should be ok
210 $RUNAS touch $file1 || error "can't touch $file1"
211 [ -f $file1 ] || error "$file1 not found"
213 # cleanup all cred/ctx and touch
214 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
215 $RUNAS touch $file2 && error "unexpected success"
219 $RUNAS touch $file2 || error "should not fail"
220 [ -f $file2 ] || error "$file2 not found"
222 run_test 2 "lfs flushctx"
225 local file=$DIR/$tdir/$tfile
227 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
228 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
231 echo "aaaaaaaaaaaaaaaaa" > $file
233 $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
234 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
235 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
238 $RUNAS $MULTIOP $file o_r &
240 # wait multiop finish its open()
243 # cleanup all cred/ctx and check
244 # metadata check should fail, but file data check should succeed
245 # because we always use root credential to OSTs
246 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
247 echo "destroyed credentials/contexs for $RUNAS_ID"
248 $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
250 wait $OPPID || error "read file data failed"
251 echo "read file data OK"
253 # restore and check again
255 echo "restored credentials for $RUNAS_ID"
256 $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
257 echo "$RUNAS_ID checkstat OK"
258 $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
259 echo "$UID checkstat OK"
260 $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
261 echo "$RUNAS_ID read file data OK"
263 run_test 3 "local cache under DLM lock"
266 local file1=$DIR/$tfile-1
267 local file2=$DIR/$tfile-2
269 ! $GSS_PIPEFS && skip "pipefs not used" && return
271 chmod 0777 $DIR || error "chmod $DIR failed"
272 # current access should be ok
273 $RUNAS touch $file1 || error "can't touch $file1"
274 [ -f $file1 ] || error "$file1 not found"
277 send_sigint client lgssd
279 check_gss_daemon_facet client lgssd && error "lgssd still running"
281 # flush context, and touch
282 $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
283 $RUNAS touch $file2 &
285 echo "waiting touch pid $TOUCHPID"
286 wait $TOUCHPID && error "touch should fail"
289 do_facet client "$LGSSD -v"
291 check_gss_daemon_facet client lgssd
293 # touch new should succeed
294 $RUNAS touch $file2 || error "can't touch $file2"
295 [ -f $file2 ] || error "$file2 not found"
297 run_test 4 "lgssd dead, operations should wait timeout and fail"
300 local file1=$DIR/$tdir/$tfile-1
301 local file2=$DIR/$tdir/$tfile-2
302 local wait_time=$((TIMEOUT + TIMEOUT / 2))
304 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
305 chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
307 # current access should be ok
308 $RUNAS touch $file1 || error "can't touch $file1"
309 [ -f $file1 ] || error "$file1 not found"
312 send_sigint $(comma_list $(mdts_nodes)) $LSVCGSSD
314 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD &&
315 error "$LSVCGSSD still running"
317 # flush context, and touch
318 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context (1)"
320 $RUNAS touch $file2 && error "should fail without $LSVCGSSD"
322 # restart lsvcgssd, expect touch succeed
323 echo "restart $LSVCGSSD and recovering"
324 start_gss_daemons $(comma_list $(mdts_nodes)) "$LSVCGSSD -vvv"
326 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD
327 $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context (2)"
329 $RUNAS touch $file2 || error "should not fail now"
330 [ -f $file2 ] || error "$file2 not found"
332 run_test 5 "lsvcgssd dead, operations fail"
337 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
338 for ((i=0; i<$nfile; i++)); do
339 dd if=/dev/zero of=$DIR/$tdir/$tfile-$i bs=8k count=1 ||
340 error "dd $tfile-$i failed"
342 ls -l $DIR/$tdir/* > /dev/null || error "ls failed"
343 rm -rf $DIR2/$tdir/* || error "rm failed"
344 rmdir $DIR2/$tdir || error "rmdir failed"
346 run_test 6 "test basic DLM callback works"
351 # for open(), client only reserve space for default stripe count lovea,
352 # and server may return larger lovea in reply (because of larger stripe
353 # count), client need call enlarge_reqbuf() and save the replied lovea
354 # in request for future possible replay.
356 # Note: current script does NOT guarantee enlarge_reqbuf() will be in
357 # the path, however it does work in local test which has 2 OSTs and
358 # default stripe count is 1.
359 [[ $OSTCOUNT -ge 2 ]] || skip_env "needs >= 2 OSTs"
361 mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
362 $LFS setstripe -c $OSTCOUNT $DIR/$tdir || error "setstripe -c $OSTCOUNT"
365 for ((i = 0; i < 20; i++)); do
366 dd if=/dev/zero of=$DIR/$tdir/f$i bs=4k count=16 2>/dev/null
369 for ((i = 0; i < 20; i++)); do
370 dd if=$DIR/$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
373 run_test 7 "exercise enlarge_reqbuf()"
377 local atoldbase=$(do_facet $SINGLEMDS "$LCTL get_param -n at_history")
380 do_facet $SINGLEMDS "$LCTL set_param at_history=8" || true
382 "do_facet $SINGLEMDS $LCTL set_param at_history=$atoldbase" EXIT
389 stack_trap debugrestore EXIT
390 $LCTL set_param debug=+other
392 # wait for the at estimation come down, this is faster
394 req_delay=$($LCTL get_param -n \
395 mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
396 awk '/portal 12/ {print $5}' | tail -1)
397 [ $req_delay -le 5 ] && break
398 echo "current AT estimation is $req_delay, wait a little bit"
401 req_delay=$((${req_delay} + ${req_delay} / 4 + 5))
403 # sleep sometime in ctx handle
404 do_facet $SINGLEMDS $LCTL set_param fail_val=$req_delay
405 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
406 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1204
408 $RUNAS $LFS flushctx -k -r $MOUNT ||
409 error "can't flush context on $MOUNT"
412 $RUNAS touch $DIR/$tdir/$tfile &
414 echo "waiting for touch (pid $TOUCHPID) to finish..."
415 sleep 2 # give it a chance to really trigger context init rpc
416 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
417 wait $TOUCHPID || error "touch should have succeeded"
419 $LCTL dk | grep -i "Early reply #" || error "No early reply"
421 run_test 8 "Early reply sent for slow gss context negotiation"
424 # following tests will manipulate flavors and may end with any flavor set,
425 # so each test should not assume any start flavor.
429 if [ "$SLOW" = "no" ]; then
437 restore_to_default_flavor
442 for ((n = 1; n <= $total; n++)); do
445 echo "flush ctx ($n/$total) ..."
446 $LFS flushctx -k -r $MOUNT ||
447 error "can't flush context on $MOUNT"
450 #sleep to let ctxs be re-established
454 run_test 90 "recoverable from losing contexts under load"
464 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
465 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
466 echo "original general rules: $nrule_old"
468 for ((i = $nrule_old; i < $max; i++)); do
469 set_rule $FSNAME ${NETTYPE}$i cli2mdt krb5n ||
470 error "set rule $i (1)"
471 set_rule $FSNAME ${NETTYPE}$i cli2ost krb5n ||
472 error "set rule $i (2)"
473 set_rule $FSNAME ${NETTYPE}$i mdt2ost null ||
474 error "set rule $i (3)"
475 set_rule $FSNAME ${NETTYPE}$i mdt2mdt null ||
476 error "set rule $i (4)"
478 for ((i = $nrule_old; i < $max; i++)); do
479 set_rule $FSNAME ${NETTYPE}$i cli2mdt ||
480 error "remove rule $i (1)"
481 set_rule $FSNAME ${NETTYPE}$i cli2ost ||
482 error "remove rule $i (2)"
483 set_rule $FSNAME ${NETTYPE}$i mdt2ost ||
484 error "remove rule $i (3)"
485 set_rule $FSNAME ${NETTYPE}$i mdt2mdt ||
486 error "remove rule $i (4)"
490 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
491 2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
492 if [ $nrule_new != $nrule_old ]; then
493 error "general rule: $nrule_new != $nrule_old"
497 # target-specific rules
499 nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
500 2>/dev/null | grep -c "$FSNAME-MDT0000.srpc.flavor.")
501 echo "original target rules: $nrule_old"
503 for ((i = $nrule_old; i < $max; i++)); do
504 set_rule $FSNAME-MDT0000 ${NETTYPE}$i cli2mdt krb5i ||
505 error "set new rule $i (1)"
506 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2ost null ||
507 error "set new rule $i (2)"
508 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2mdt null ||
509 error "set new rule $i (3)"
511 for ((i = $nrule_old; i < $max; i++)); do
512 set_rule $FSNAME-MDT0000 ${NETTYPE}$i cli2mdt ||
513 error "remove new rule $i (1)"
514 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2ost ||
515 error "remove new rule $i (2)"
516 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2mdt ||
517 error "remove new rule $i (3)"
520 nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
521 2>/dev/null \ | grep -c "$FSNAME-MDT0000.srpc.flavor.")
522 if [ $nrule_new != $nrule_old ]; then
523 error "general rule: $nrule_new != $nrule_old"
526 run_test 99 "set large number of sptlrpc rules"
529 # started from default flavors
530 restore_to_default_flavor
534 # running dbench in background
538 # all: null -> krb5n -> krb5a -> krb5i -> krb5p
556 # nothing should be changed because they are overridden by above rules
558 set_rule $FSNAME-MDT0000 any cli2mdt krb5a
559 set_rule $FSNAME-OST0000 any cli2ost krb5i
560 wait_flavor cli2mdt krb5p || error_dbench "1"
562 wait_flavor cli2ost krb5p || error_dbench "2"
569 set_rule $FSNAME-MDT0000 any cli2mdt
570 set_rule $FSNAME-OST0000 any cli2ost
576 set_rule $FSNAME any mdt2mdt
577 set_rule $FSNAME any cli2mdt
578 set_rule $FSNAME any mdt2ost
579 set_rule $FSNAME any cli2ost
580 restore_to_default_flavor
585 run_test 100 "change security flavor on the fly under load"
591 local filename=$DIR/$tfile
596 # after setting flavor0, start multiop which uses flavor0 rpc, and let
597 # server drop the reply; then switch to flavor1, the resend should be
598 # completed using flavor1. To exercise the code of switching ctx/sec
599 # for a resend request.
601 log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
603 set_rule $FSNAME any cli2mdt $flavor0
604 wait_flavor cli2mdt $flavor0
605 rm -f $filename || error "remove old $filename failed"
608 #define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513
609 do_facet $SINGLEMDS lctl set_param fail_val=36
610 do_facet $SINGLEMDS lctl set_param fail_loc=0x513
611 log "starting multiop"
612 $MULTIOP $filename m &
614 echo "multiop pid=$multiop_pid"
617 set_rule $FSNAME any cli2mdt $flavor1
618 wait_flavor cli2mdt $flavor1
620 num=$(ps --no-headers -p $multiop_pid 2>/dev/null | wc -l)
621 [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
622 echo "process $multiop_pid is still hanging there... OK"
624 do_facet $SINGLEMDS lctl set_param fail_loc=0
625 log "waiting for multiop ($multiop_pid) to finish"
626 wait $multiop_pid || error "multiop returned error"
631 # started from default flavors
632 restore_to_default_flavor
634 switch_sec_test null krb5n
635 switch_sec_test krb5n krb5a
636 switch_sec_test krb5a krb5i
637 switch_sec_test krb5i krb5p
638 switch_sec_test krb5p null
640 run_test 101 "switch ctx/sec for resending request"
653 # started from default flavors
654 restore_to_default_flavor
658 # run dbench background
661 echo "Testing null->krb5n->krb5a->krb5i->krb5p->null"
670 echo "waiting for 15s and check again"
674 echo "Testing null->krb5i->null->krb5i->null..."
675 for ((idx = 0; idx < 5; idx++)); do
683 echo "waiting for 15s and check again"
689 run_test 102 "survive from fast flavor switch"
694 local clients=$CLIENTS
696 [ -z $clients ] && clients=$HOSTNAME
698 # started from default flavors
699 restore_to_default_flavor
701 # at this time no rules has been set on mgs; mgc use null
702 # flavor to connect to mgs
703 count=$(flvr_cnt_mgc2mgs null)
704 [ $count -eq 1 ] || error "$count mgc connections use null flavor"
706 zconf_umount_clients $clients $MOUNT || error "umount failed (1)"
708 # mount client with conflict flavor - should fail
709 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
710 zconf_mount_clients $clients $MOUNT $mount_opts &&
711 error "mount with conflict flavor should have failed"
713 # mount client with same flavor - should succeed
714 mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=null"
715 zconf_mount_clients $clients $MOUNT $mount_opts ||
716 error "mount with same flavor should have succeeded"
717 zconf_umount_clients $clients $MOUNT || error "umount failed (2)"
719 # mount client with default flavor - should succeed
720 zconf_mount_clients $clients $MOUNT ||
721 error "mount with default flavor should have succeeded"
723 run_test 150 "secure mgs connection: client flavor setting"
727 set_rule _mgs any any
729 # umount everything, then remount
737 stack_trap exit_151 EXIT
739 # set mgs rule to only accept krb5p
740 set_rule _mgs any any krb5p
742 # umount everything, modules still loaded
745 # start gss daemon on mgs node
746 combined_mgs_mds || start_gss_daemons $mgs_HOST "$LSVCGSSD -vvv"
749 start mgs $(mgsdevname 1) $MDS_MOUNT_OPTS
751 # mount with default flavor, expected to fail
752 start ost1 "$(ostdevname 1)" $OST_MOUNT_OPTS
753 wait_mgc_import_state ost1 FULL 0 &&
754 error "mount with default flavor should have failed"
757 # mount with unauthorized flavor should fail
758 if [ -z "$OST_MOUNT_OPTS" ]; then
759 new_opts="-o mgssec=null"
761 new_opts="$OST_MOUNT_OPTS,mgssec=null"
763 start ost1 "$(ostdevname 1)" $new_opts
764 wait_mgc_import_state ost1 FULL 0 &&
765 error "mount with unauthorized flavor should have failed"
768 # mount with designated flavor should succeed
769 if [ -z "$OST_MOUNT_OPTS" ]; then
770 new_opts="-o mgssec=krb5p"
772 new_opts="$OST_MOUNT_OPTS,mgssec=krb5p"
774 start ost1 "$(ostdevname 1)" $new_opts
775 wait_mgc_import_state ost1 FULL 0 ||
776 error "mount with designated flavor should have succeeded"
780 run_test 151 "secure mgs connection: server flavor control"
785 check_and_cleanup_lustre