lustre/tests/sanity-krb5.sh

   1 #!/bin/bash
   2 #
   3 # Run select tests by setting ONLY, or as arguments to the script.
   4 # Skip specific tests by setting EXCEPT.
   5 #
   6 # e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31"
   7 set -e
   8
   9 ONLY=${ONLY:-"$*"}
  10
  11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
  12 . $LUSTRE/tests/test-framework.sh
  13 init_test_env $@
  14 init_logging
  15
  16 ALWAYS_EXCEPT="$SANITY_GSS_EXCEPT"
  17
  18 [ "$SLOW" = "no" ] && EXCEPT_SLOW="100 101"
  19
  20 build_test_filter
  21
  22 require_dsh_mds || exit 0
  23
  24 # $RUNAS_ID may get set incorrectly somewhere else
  25 [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
  26     error "RUNAS_ID set to 0, but UID is also 0!"
  27
  28 # remove $SEC, we'd like to control everything by ourselves
  29 unset SEC
  30
  31 #
  32 # global variables of this sanity
  33 #
  34 DBENCH_PID=0
  35
  36 # set manually
  37 GSS=true
  38 GSS_KRB5=true
  39
  40 # Overwrite RUNAS command to use su - instead,
  41 # to initialize the process more completely.
  42 # This is required to get proper access to keyrings.
  43 RUNAS="runas_su $(id -n -u $RUNAS_ID)"
  44
  45 check_krb_env() {
  46         which klist || skip "Kerberos env not setup"
  47         which kinit || skip "Kerberos env not setup"
  48 }
  49
  50 prepare_krb5_creds() {
  51         echo prepare krb5 cred
  52         echo RUNAS=$RUNAS
  53         $RUNAS krb5_login.sh || exit 1
  54 }
  55
  56 check_krb_env
  57 prepare_krb5_creds
  58
  59 # we want double mount
  60 MOUNT_2=${MOUNT_2:-"yes"}
  61 check_and_setup_lustre
  62
  63 rm -rf $DIR/[df][0-9]*
  64
  65 check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
  66
  67 start_dbench()
  68 {
  69         local NPROC=$(grep -c ^processor /proc/cpuinfo)
  70         [ $NPROC -gt 2 ] && NPROC=2
  71         bash rundbench -D $DIR/$tdir $NPROC 1>/dev/null &
  72         DBENCH_PID=$!
  73         sleep 2
  74
  75         num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
  76         if [ $num -ne 1 ]; then
  77                 error "failed to start dbench $NPROC"
  78         else
  79                 echo "started dbench with $NPROC processes at background"
  80         fi
  81
  82         return 0
  83 }
  84
  85 check_dbench()
  86 {
  87         num=$(ps --no-headers -p $DBENCH_PID 2>/dev/null | wc -l)
  88         if [ $num -eq 0 ]; then
  89                 echo "dbench $DBENCH_PID already finished"
  90                 wait $DBENCH_PID || error "dbench $PID exit with error"
  91                 start_dbench
  92         elif [ $num -ne 1 ]; then
  93                 killall -9 dbench
  94                 error "found $num instance of pid $DBENCH_PID ???"
  95         fi
  96
  97         return 0
  98 }
  99
 100 stop_dbench()
 101 {
 102         for ((;;)); do
 103                 killall dbench 2>/dev/null
 104                 local num=$(ps --no-headers -p $DBENCH_PID | wc -l)
 105                 if [ $num -eq 0 ]; then
 106                         echo "dbench finished"
 107                         break
 108                 fi
 109                 echo "dbench $DBENCH_PID is still running, waiting 2s..."
 110                 sleep 2
 111         done
 112
 113         wait $DBENCH_PID || true
 114         sync || true
 115 }
 116
 117 error_dbench()
 118 {
 119         local err_str=$1
 120
 121         killall -9 dbench
 122         sleep 1
 123
 124         error $err_str
 125 }
 126
 127 restore_krb5_cred() {
 128         local keys=$(keyctl show | awk '$6 ~ "^lgssc:" {print $1}')
 129
 130         for key in $keys; do
 131                 keyctl unlink $key
 132         done
 133         echo RUNAS=$RUNAS
 134         $RUNAS krb5_login.sh || exit 1
 135 }
 136
 137 check_multiple_gss_daemons() {
 138         local facet=$1
 139         local gssd=$2
 140         local gssd_name=$(basename $gssd)
 141
 142         for ((i = 0; i < 10; i++)); do
 143                 do_facet $facet "$gssd -vvv"
 144         done
 145
 146         # wait daemons entering "stable" status
 147         sleep 5
 148
 149         local num=$(do_facet $facet ps -o cmd -C $gssd_name |
 150                 grep -c $gssd_name)
 151         echo "$num instance(s) of $gssd_name are running"
 152
 153         if [ $num -ne 1 ]; then
 154                 error "$gssd_name not unique"
 155         fi
 156 }
 157
 158 calc_connection_cnt
 159 umask 077
 160
 161 test_0() {
 162         local my_facet=mds
 163
 164         echo "bring up gss daemons..."
 165         start_gss_daemons
 166
 167         echo "check with someone already running..."
 168         check_multiple_gss_daemons $my_facet $LSVCGSSD
 169
 170         echo "check with someone run & finished..."
 171         do_facet $my_facet killall -q -2 lgssd $LSVCGSSD || true
 172         sleep 5 # wait fully exit
 173         check_multiple_gss_daemons $my_facet $LSVCGSSD
 174
 175         echo "check refresh..."
 176         do_facet $my_facet killall -q -2 lgssd $LSVCGSSD || true
 177         sleep 5 # wait fully exit
 178         do_facet $my_facet ipcrm -S 0x3b92d473
 179         check_multiple_gss_daemons $my_facet $LSVCGSSD
 180 }
 181 run_test 0 "start multiple gss daemons"
 182
 183 set_flavor_all krb5p
 184
 185 test_1a() {
 186         local file=$DIR/$tdir/$tfile
 187
 188         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 189         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 190         $RUNAS ls -ld $DIR/$tdir
 191
 192         # access w/o cred
 193         $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
 194         $RUNAS touch $file && error "unexpected success"
 195
 196         # access w/ cred
 197         restore_krb5_cred
 198         $RUNAS touch $file || error "should not fail"
 199         [ -f $file ] || error "$file not found"
 200 }
 201 run_test 1a "access with or without krb5 credential"
 202
 203 test_1b() {
 204         local file=$DIR/$tdir/$tfile
 205         local lgssconf=/etc/request-key.d/lgssc.conf
 206         local clients=$CLIENTS
 207         local realm
 208
 209         [ -z $clients ] && clients=$HOSTNAME
 210         zconf_umount_clients $clients $MOUNT || error "umount clients failed"
 211
 212         echo "stop gss daemons..."
 213         stop_gss_daemons
 214
 215         # get local realm from krb5.conf, assume the same for all nodes
 216         realm=$(grep default_realm /etc/krb5.conf | awk '{print $3}')
 217
 218         # add -R option to lgss_keyring on local client
 219         cp $lgssconf $TMP/lgssc.conf
 220         stack_trap "yes | cp $TMP/lgssc.conf $lgssconf" EXIT
 221         sed -i s+lgss_keyring+\&\ \-R\ $realm+ $lgssconf
 222
 223         # add -R option to lsvcgssd
 224         echo "bring up gss daemons..."
 225         start_gss_daemons '' '' "-R $realm"
 226         stack_trap "stop_gss_daemons ; start_gss_daemons" EXIT
 227
 228         zconf_mount_clients $clients $MOUNT || error "mount clients failed"
 229
 230         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 231         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 232         $RUNAS touch $file || error "touch $file failed"
 233         [ -f $file ] || error "$file not found"
 234 }
 235 run_test 1b "Use specified realm"
 236
 237 test_2() {
 238         local file1=$DIR/$tdir/$tfile-1
 239         local file2=$DIR/$tdir/$tfile-2
 240
 241         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 242         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 243
 244         # current access should be ok
 245         $RUNAS touch $file1 || error "can't touch $file1"
 246         [ -f $file1 ] || error "$file1 not found"
 247
 248         # cleanup all cred/ctx and touch
 249         $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
 250         $RUNAS touch $file2 && error "unexpected success"
 251
 252         # restore and touch
 253         restore_krb5_cred
 254         $RUNAS touch $file2 || error "should not fail"
 255         [ -f $file2 ] || error "$file2 not found"
 256 }
 257 run_test 2 "lfs flushctx"
 258
 259 test_3() {
 260         local file=$DIR/$tdir/$tfile
 261
 262         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 263         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 264
 265         # create file
 266         echo "aaaaaaaaaaaaaaaaa" > $file
 267         chmod 0666 $file
 268         $CHECKSTAT -p 0666 $file || error "$UID checkstat error"
 269         $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat error"
 270         $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat error"
 271
 272         # start multiop
 273         $RUNAS $MULTIOP $file o_r &
 274         OPPID=$!
 275         # wait multiop finish its open()
 276         sleep 1
 277
 278         # cleanup all cred/ctx and check
 279         # metadata check should fail, but file data check should succeed
 280         # because we always use root credential to OSTs
 281         $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context"
 282         echo "destroyed credentials/contexs for $RUNAS_ID"
 283         $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
 284         kill -s 10 $(pgrep -u $USER0 $MULTIOP)
 285         wait $OPPID || error "read file data failed"
 286         echo "read file data OK"
 287
 288         # restore and check again
 289         restore_krb5_cred
 290         echo "restored credentials for $RUNAS_ID"
 291         $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
 292         echo "$RUNAS_ID checkstat OK"
 293         $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
 294         echo "$UID checkstat OK"
 295         $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
 296         echo "$RUNAS_ID read file data OK"
 297 }
 298 run_test 3 "local cache under DLM lock"
 299
 300 test_5() {
 301         local file1=$DIR/$tdir/$tfile-1
 302         local file2=$DIR/$tdir/$tfile-2
 303         local file3=$DIR/$tdir/$tfile-3
 304         local wait_time=$((TIMEOUT + TIMEOUT / 2))
 305         local mdts=$(mdts_nodes)
 306
 307         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 308         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 309
 310         # current access should be ok
 311         $RUNAS touch $file1 || error "can't touch $file1"
 312         [ -f $file1 ] || error "$file1 not found"
 313
 314         # flush context
 315         $RUNAS $LFS flushctx $MOUNT || error "can't flush context (1)"
 316
 317         # stop lsvcgssd
 318         send_sigint $mdts $LSVCGSSD
 319         sleep 5
 320         check_gss_daemon_nodes $mdts $LSVCGSSD &&
 321                 error "$LSVCGSSD still running (1)"
 322
 323         # daemon should restart automatically, at least on newer servers
 324         $RUNAS touch $file2
 325         if [ $? -ne 0 ]; then
 326                 echo "$RUNAS touch $file2 failed"
 327                 (( MDS1_VERSION < $(version_code 2.15.61) )) ||
 328                         error "$LSVCGSSD should restart automatically"
 329         else
 330                 echo "$RUNAS touch $file2 succeeded"
 331         fi
 332
 333         # flush context
 334         if (( MDS1_VERSION >= $(version_code 2.15.61) )); then
 335                 $RUNAS $LFS flushctx $MOUNT || error "can't flush context (2)"
 336         fi
 337
 338         # stop lsvcgssd
 339         send_sigint $mdts $LSVCGSSD
 340         sleep 5
 341         check_gss_daemon_nodes $mdts $LSVCGSSD &&
 342                 error "$LSVCGSSD still running (2)"
 343
 344         # restart lsvcgssd, expect touch succeed
 345         echo "restart $LSVCGSSD and recovering"
 346         start_gss_daemons $mdts $LSVCGSSD "-vvv"
 347         sleep 5
 348         check_gss_daemon_nodes $mdts $LSVCGSSD
 349         $RUNAS touch $file3 || error "should not fail now"
 350         [ -f $file3 ] || error "$file3 not found"
 351 }
 352 run_test 5 "lsvcgssd dead, operations pass"
 353
 354 test_6() {
 355         local nfile=10
 356
 357         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 358         for ((i=0; i<$nfile; i++)); do
 359                 dd if=/dev/zero of=$DIR/$tdir/$tfile-$i bs=8k count=1 ||
 360                     error "dd $tfile-$i failed"
 361         done
 362         ls -l $DIR/$tdir/* > /dev/null || error "ls failed"
 363         rm -rf $DIR2/$tdir/* || error "rm failed"
 364         rmdir $DIR2/$tdir || error "rmdir failed"
 365 }
 366 run_test 6 "test basic DLM callback works"
 367
 368 test_7() {
 369         local num_osts
 370
 371         # for open(), client only reserve space for default stripe count lovea,
 372         # and server may return larger lovea in reply (because of larger stripe
 373         # count), client need call enlarge_reqbuf() and save the replied lovea
 374         # in request for future possible replay.
 375         #
 376         # Note: current script does NOT guarantee enlarge_reqbuf() will be in
 377         # the path, however it does work in local test which has 2 OSTs and
 378         # default stripe count is 1.
 379         [[ $OSTCOUNT -ge 2 ]] || skip_env "needs >= 2 OSTs"
 380
 381         mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 382         $LFS setstripe -c $OSTCOUNT $DIR/$tdir || error "setstripe -c $OSTCOUNT"
 383
 384         echo "creating..."
 385         for ((i = 0; i < 20; i++)); do
 386                 dd if=/dev/zero of=$DIR/$tdir/f$i bs=4k count=16 2>/dev/null
 387         done
 388         echo "reading..."
 389         for ((i = 0; i < 20; i++)); do
 390                 dd if=$DIR/$tdir/f$i of=/dev/null bs=4k count=16 2>/dev/null
 391         done
 392 }
 393 run_test 7 "exercise enlarge_reqbuf()"
 394
 395 test_8()
 396 {
 397         local atoldbase=$(do_facet $SINGLEMDS "$LCTL get_param -n at_history")
 398         local req_delay
 399
 400         do_facet $SINGLEMDS "$LCTL set_param at_history=8" || true
 401         stack_trap \
 402                 "do_facet $SINGLEMDS $LCTL set_param at_history=$atoldbase" EXIT
 403
 404         mkdir_on_mdt0 $DIR/$tdir
 405         chmod a+w $DIR/$tdir
 406
 407         $RUNAS ls $DIR/$tdir
 408         $RUNAS keyctl show @u
 409         echo Flushing gss ctxs
 410         $RUNAS $LFS flushctx $MOUNT || error "can't flush context on $MOUNT"
 411         $RUNAS keyctl show @u
 412
 413         $LCTL dk > /dev/null
 414         debugsave
 415         stack_trap debugrestore EXIT
 416         $LCTL set_param debug=+other
 417
 418         # wait for the at estimation come down, this is faster
 419         while [ true ]; do
 420                 req_delay=$($LCTL get_param -n \
 421                         mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
 422                         awk '/portal 12/ {print $5}' | tail -1)
 423                 [ $req_delay -le 5 ] && break
 424                 echo "current AT estimation is $req_delay, wait a little bit"
 425                 sleep 8
 426         done
 427         req_delay=$((${req_delay} + ${req_delay} / 4 + 5))
 428
 429         # sleep sometime in ctx handle
 430         do_facet $SINGLEMDS $LCTL set_param fail_val=$req_delay
 431         #define OBD_FAIL_SEC_CTX_HDL_PAUSE       0x00001204
 432         #define CFS_FAIL_ONCE                    0x80000000
 433         do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001204
 434
 435         $RUNAS touch $DIR/$tdir/$tfile &
 436         TOUCHPID=$!
 437         echo "waiting for touch (pid $TOUCHPID) to finish..."
 438         wait $TOUCHPID || error "touch should have succeeded"
 439         $RUNAS keyctl show @u
 440
 441         $LCTL dk | grep -i "Early reply #" || error "No early reply"
 442 }
 443 run_test 8 "Early reply sent for slow gss context negotiation"
 444
 445 test_9() {
 446         local test9user=$(getent passwd $RUNAS_ID | cut -d: -f1)
 447
 448         $LFS mkdir -i 0 -c 1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 449         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 450         $RUNAS ls -ld $DIR/$tdir
 451
 452         # Add group, and client to new group, on client only.
 453         # Server is not aware.
 454         groupadd -g 5000 grptest9
 455         stack_trap "groupdel grptest9" EXIT
 456
 457         usermod -g grptest9 $test9user
 458         stack_trap "usermod -g $test9user $test9user" EXIT
 459         id $RUNAS_ID
 460         # Thanks to Kerberos, client should not be able to create file
 461         # with primary group not known on server side
 462         $RUNAS touch $DIR/$tdir/fileA &&
 463                 error "server should not trust client's primary gid"
 464         do_facet mds1 "lctl set_param mdt.*.identity_flush=-1"
 465
 466         do_facet mds1 groupadd -g 5000 grptest9
 467         stack_trap "do_facet mds1 groupdel grptest9 || true" EXIT
 468         do_facet mds1 usermod -a -G grptest9 $test9user
 469         stack_trap "do_facet mds1 gpasswd -d $test9user grptest9 || true" EXIT
 470         id $RUNAS_ID
 471         do_facet mds1 "id $RUNAS_ID"
 472         # Thanks to Kerberos, client should be able to create file
 473         # with primary group taken as one of supp groups, as long as
 474         # server side knows the supp groups.
 475         $RUNAS touch $DIR/$tdir/fileA ||
 476                 error "server should know client's supp gid"
 477         ls -l $DIR/$tdir
 478         do_facet mds1 "lctl set_param mdt.*.identity_flush=-1"
 479         do_facet mds1 gpasswd -d $test9user grptest9
 480         do_facet mds1 groupdel grptest9
 481         usermod -g $test9user $test9user
 482
 483         usermod -a -G grptest9 $test9user
 484         stack_trap "gpasswd -d $test9user grptest9" EXIT
 485         id $RUNAS_ID
 486         $RUNAS touch $DIR/$tdir/fileB
 487         ls -l $DIR/$tdir
 488         # Thanks to Kerberos, client should not be able to chgrp
 489         $RUNAS chgrp grptest9 $DIR/$tdir/fileB &&
 490                 error "server should not trust client's supp gid"
 491         ls -l $DIR/$tdir
 492         do_facet mds1 "lctl set_param mdt.*.identity_flush=-1"
 493 }
 494 run_test 9 "Do not trust primary and supp gids from client"
 495
 496 test_10() {
 497         local count
 498
 499         $LFS mkdir -i 0 -c $MDSCOUNT $DIR/$tdir ||
 500                 error "mkdir $DIR/$tdir failed"
 501         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 502         $RUNAS ls -ld $DIR/$tdir || error "ls -ld $DIR/$tdir failed"
 503         $RUNAS grep lgssc /proc/keys
 504
 505         # get rid of gss context and credentials for user
 506         $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context (1)"
 507         $RUNAS grep lgssc /proc/keys
 508         stack_trap restore_krb5_cred EXIT
 509
 510         # restore krb credentials
 511         restore_krb5_cred
 512
 513         # revoke session keyring for user and access to fs in the same su -
 514         su - $(id -n -u $RUNAS_ID) -c "keyctl revoke @s && ls -ld $DIR/$tdir" ||
 515                 error "revoke + ls failed"
 516         $RUNAS grep lgssc /proc/keys
 517
 518         # refcount on lgssc keys should be 2
 519         for ref in $($RUNAS grep lgssc /proc/keys | awk '$4~"perm"{print $3}');\
 520           do
 521                 [[ $ref == 2 ]] || error "bad refcnt $ref on key"
 522         done
 523
 524         # get rid of gss context for user
 525         $RUNAS $LFS flushctx $MOUNT || error "can't flush context (2)"
 526         $RUNAS grep lgssc /proc/keys
 527         count=$($RUNAS grep lgssc /proc/keys | grep -v "Running as" | wc -l)
 528         [[ $count == 0 ]] || error "remaining $count keys for user"
 529 }
 530 run_test 10 "Support revoked session keyring"
 531
 532 exit_11() {
 533         zconf_umount $HOSTNAME $MOUNT
 534
 535         zconf_mount $HOSTNAME $MOUNT
 536         if [ "$MOUNT_2" ]; then
 537                 zconf_mount $HOSTNAME $MOUNT2
 538         fi
 539
 540         restore_krb5_cred
 541 }
 542
 543 test_11() {
 544         local count
 545
 546         $LFS mkdir -i 0 -c $MDSCOUNT $DIR/$tdir ||
 547                 error "mkdir $DIR/$tdir failed"
 548         chmod 0777 $DIR/$tdir || error "chmod $DIR/$tdir failed"
 549         $RUNAS ls -ld $DIR/$tdir || error "ls -ld $DIR/$tdir failed"
 550         $RUNAS grep lgssc /proc/keys
 551         $RUNAS klist
 552
 553         # get rid of gss context and credentials for user
 554         $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context (1)"
 555         $RUNAS grep lgssc /proc/keys
 556         $RUNAS klist
 557
 558         stack_trap exit_11 EXIT
 559         zconf_umount $HOSTNAME $MOUNT || error "umount $MOUNT failed"
 560         if [ "$MOUNT_2" ]; then
 561                 zconf_umount $HOSTNAME $MOUNT2 ||
 562                         error "umount $MOUNT2 failed"
 563         fi
 564         kdestroy
 565         klist
 566
 567         # we want KCM ccache
 568         cp /etc/krb5.conf /etc/krb5.conf.bkp
 569         stack_trap "/bin/mv /etc/krb5.conf.bkp /etc/krb5.conf" EXIT
 570         sed -i '1i default_ccache_name = KCM:' /etc/krb5.conf
 571         sed -i '1i [libdefaults]' /etc/krb5.conf
 572         zconf_mount $HOSTNAME $MOUNT || error "remount $MOUNT failed"
 573         klist
 574
 575         $RUNAS touch $DIR/$tdir/$tfile && error "write $tfile should fail"
 576         restore_krb5_cred
 577         $RUNAS klist
 578         $RUNAS touch $DIR/$tdir/$tfile || error "write $tfile failed"
 579         $RUNAS klist
 580         $RUNAS klist | grep -q lustre_mds || error "mds ticket not present"
 581
 582         $RUNAS $LFS flushctx -k -r $MOUNT || error "can't flush context (2)"
 583         kdestroy
 584 }
 585 run_test 11 "KCM ccache"
 586
 587 #
 588 # following tests will manipulate flavors and may end with any flavor set,
 589 # so each test should not assume any start flavor.
 590 #
 591
 592 test_90() {
 593         if [ "$SLOW" = "no" ]; then
 594                 total=10
 595         else
 596                 total=60
 597         fi
 598
 599         mkdir $DIR/$tdir
 600
 601         restore_to_default_flavor
 602         set_flavor_all krb5p
 603
 604         start_dbench
 605
 606         for ((n = 1; n <= $total; n++)); do
 607                 sleep 2
 608                 check_dbench
 609                 echo "flush ctx ($n/$total) ..."
 610                 $LFS flushctx -k -r $MOUNT ||
 611                         error "can't flush context on $MOUNT"
 612         done
 613         check_dbench
 614         #sleep to let ctxs be re-established
 615         sleep 10
 616         stop_dbench
 617 }
 618 run_test 90 "recoverable from losing contexts under load"
 619
 620 test_99() {
 621         local nrule_old
 622         local nrule_new=0
 623         local max=32
 624
 625         #
 626         # general rules
 627         #
 628         nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
 629             2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
 630         echo "original general rules: $nrule_old"
 631
 632         for ((i = $nrule_old; i < $max; i++)); do
 633                 set_rule $FSNAME ${NETTYPE}$i cli2mdt krb5n ||
 634                         error "set rule $i (1)"
 635                 set_rule $FSNAME ${NETTYPE}$i cli2ost krb5n ||
 636                         error "set rule $i (2)"
 637                 set_rule $FSNAME ${NETTYPE}$i mdt2ost null ||
 638                         error "set rule $i (3)"
 639                 set_rule $FSNAME ${NETTYPE}$i mdt2mdt null ||
 640                         error "set rule $i (4)"
 641         done
 642         for ((i = $nrule_old; i < $max; i++)); do
 643                 set_rule $FSNAME ${NETTYPE}$i cli2mdt ||
 644                         error "remove rule $i (1)"
 645                 set_rule $FSNAME ${NETTYPE}$i cli2ost ||
 646                         error "remove rule $i (2)"
 647                 set_rule $FSNAME ${NETTYPE}$i mdt2ost ||
 648                         error "remove rule $i (3)"
 649                 set_rule $FSNAME ${NETTYPE}$i mdt2mdt ||
 650                         error "remove rule $i (4)"
 651
 652         done
 653
 654         nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
 655             2>/dev/null | grep -c "$FSNAME.srpc.flavor.")
 656         if [ $nrule_new != $nrule_old ]; then
 657                 error "general rule: $nrule_new != $nrule_old"
 658         fi
 659
 660         #
 661         # target-specific rules
 662         #
 663         nrule_old=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
 664             2>/dev/null | grep -c "$FSNAME-MDT0000.srpc.flavor.")
 665         echo "original target rules: $nrule_old"
 666
 667         for ((i = $nrule_old; i < $max; i++)); do
 668                 set_rule $FSNAME-MDT0000 ${NETTYPE}$i cli2mdt krb5i ||
 669                         error "set new rule $i (1)"
 670                 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2ost null ||
 671                         error "set new rule $i (2)"
 672                 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2mdt null ||
 673                         error "set new rule $i (3)"
 674         done
 675         for ((i = $nrule_old; i < $max; i++)); do
 676                 set_rule $FSNAME-MDT0000 ${NETTYPE}$i cli2mdt ||
 677                         error "remove new rule $i (1)"
 678                 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2ost ||
 679                         error "remove new rule $i (2)"
 680                 set_rule $FSNAME-MDT0000 ${NETTYPE}$i mdt2mdt ||
 681                         error "remove new rule $i (3)"
 682         done
 683
 684         nrule_new=$(do_facet mgs lctl get_param -n mgs.MGS.live.$FSNAME \
 685             2>/dev/null \ | grep -c "$FSNAME-MDT0000.srpc.flavor.")
 686         if [ $nrule_new != $nrule_old ]; then
 687                 error "general rule: $nrule_new != $nrule_old"
 688         fi
 689 }
 690 run_test 99 "set large number of sptlrpc rules"
 691
 692 test_100() {
 693         # started from default flavors
 694         restore_to_default_flavor
 695
 696         mkdir $DIR/$tdir
 697
 698         # running dbench in background
 699         start_dbench
 700
 701         #
 702         # all: null -> krb5n -> krb5a -> krb5i -> krb5p
 703         #
 704         set_flavor_all krb5n
 705         check_dbench
 706
 707         set_flavor_all krb5a
 708         check_dbench
 709
 710         set_flavor_all krb5i
 711         check_dbench
 712
 713         set_flavor_all krb5p
 714         check_dbench
 715
 716         #
 717         # * - MDT0: krb5a
 718         # * - OST0: krb5i
 719         #
 720         # nothing should be changed because they are overridden by above rules
 721         #
 722         set_rule $FSNAME-MDT0000 any cli2mdt krb5a
 723         set_rule $FSNAME-OST0000 any cli2ost krb5i
 724         wait_flavor cli2mdt krb5p || error_dbench "1"
 725         check_dbench
 726         wait_flavor cli2ost krb5p || error_dbench "2"
 727
 728         #
 729         # remove:
 730         #  * - MDT0: krb5a
 731         #  * - OST0: krb5i
 732         #
 733         set_rule $FSNAME-MDT0000 any cli2mdt
 734         set_rule $FSNAME-OST0000 any cli2ost
 735         check_dbench
 736
 737         #
 738         # delete all rules
 739         #
 740         set_rule $FSNAME any mdt2mdt
 741         set_rule $FSNAME any cli2mdt
 742         set_rule $FSNAME any mdt2ost
 743         set_rule $FSNAME any cli2ost
 744         restore_to_default_flavor
 745         check_dbench
 746
 747         stop_dbench
 748 }
 749 run_test 100 "change security flavor on the fly under load"
 750
 751 switch_sec_test()
 752 {
 753         local flavor0=$1
 754         local flavor1=$2
 755         local filename=$DIR/$tfile
 756         local multiop_pid
 757         local num
 758
 759         #
 760         # after setting flavor0, start multiop which uses flavor0 rpc, and let
 761         # server drop the reply; then switch to flavor1, the resend should be
 762         # completed using flavor1. To exercise the code of switching ctx/sec
 763         # for a resend request.
 764         #
 765         log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<"
 766
 767         set_rule $FSNAME any cli2mdt $flavor0
 768         wait_flavor cli2mdt $flavor0
 769         rm -f $filename || error "remove old $filename failed"
 770
 771         #MDS_REINT = 36
 772         #define OBD_FAIL_PTLRPC_DROP_REQ_OPC     0x513
 773         do_facet $SINGLEMDS lctl set_param fail_val=36
 774         do_facet $SINGLEMDS lctl set_param fail_loc=0x513
 775         log "starting multiop"
 776         $MULTIOP $filename m &
 777         multiop_pid=$!
 778         echo "multiop pid=$multiop_pid"
 779         sleep 1
 780
 781         set_rule $FSNAME any cli2mdt $flavor1
 782         wait_flavor cli2mdt $flavor1
 783
 784         num=$(ps --no-headers -p $multiop_pid 2>/dev/null | wc -l)
 785         [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)"
 786         echo "process $multiop_pid is still hanging there... OK"
 787
 788         do_facet $SINGLEMDS lctl set_param fail_loc=0
 789         log "waiting for multiop ($multiop_pid) to finish"
 790         wait $multiop_pid || error "multiop returned error"
 791 }
 792
 793 test_101()
 794 {
 795         # started from default flavors
 796         restore_to_default_flavor
 797
 798         switch_sec_test null  krb5n
 799         switch_sec_test krb5n krb5a
 800         switch_sec_test krb5a krb5i
 801         switch_sec_test krb5i krb5p
 802         switch_sec_test krb5p null
 803 }
 804 run_test 101 "switch ctx/sec for resending request"
 805
 806 error_102()
 807 {
 808         local err_str=$1
 809
 810         killall -9 dbench
 811         sleep 1
 812
 813         error $err_str
 814 }
 815
 816 test_102() {
 817         # started from default flavors
 818         restore_to_default_flavor
 819
 820         mkdir $DIR/$tdir
 821
 822         # run dbench background
 823         start_dbench
 824
 825         echo "Testing null->krb5n->krb5a->krb5i->krb5p->null"
 826         set_flavor_all krb5n
 827         set_flavor_all krb5a
 828         set_flavor_all krb5i
 829         set_flavor_all krb5p
 830         set_flavor_all null
 831
 832         check_dbench
 833
 834         echo "waiting for 15s and check again"
 835         sleep 15
 836         check_dbench
 837
 838         echo "Testing null->krb5i->null->krb5i->null..."
 839         for ((idx = 0; idx < 5; idx++)); do
 840                 set_flavor_all krb5i
 841                 set_flavor_all null
 842         done
 843         set_flavor_all krb5i
 844
 845         check_dbench
 846
 847         echo "waiting for 15s and check again"
 848         sleep 15
 849         check_dbench
 850
 851         stop_dbench
 852 }
 853 run_test 102 "survive from fast flavor switch"
 854
 855 test_150() {
 856         local mount_opts
 857         local count
 858         local clients=$CLIENTS
 859
 860         [ -z $clients ] && clients=$HOSTNAME
 861
 862         # started from default flavors
 863         restore_to_default_flavor
 864
 865         # at this time no rules has been set on mgs; mgc use null
 866         # flavor to connect to mgs
 867         count=$(flvr_cnt_mgc2mgs null)
 868         [ $count -eq 1 ] || error "$count mgc connections use null flavor"
 869
 870         zconf_umount_clients $clients $MOUNT || error "umount failed (1)"
 871
 872         # mount client with conflict flavor - should fail
 873         mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
 874         zconf_mount_clients $clients $MOUNT $mount_opts &&
 875                 error "mount with conflict flavor should have failed"
 876
 877         # mount client with same flavor - should succeed
 878         mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=null"
 879         zconf_mount_clients $clients $MOUNT $mount_opts ||
 880                 error "mount with same flavor should have succeeded"
 881         zconf_umount_clients $clients $MOUNT || error "umount failed (2)"
 882
 883         # mount client with default flavor - should succeed
 884         zconf_mount_clients $clients $MOUNT ||
 885                 error "mount with default flavor should have succeeded"
 886 }
 887 run_test 150 "secure mgs connection: client flavor setting"
 888
 889 exit_151() {
 890         # remove mgs rule
 891         set_rule _mgs any any
 892
 893         # umount everything, then remount
 894         stopall
 895         setupall
 896 }
 897
 898 test_151() {
 899         local new_opts
 900
 901         stack_trap exit_151 EXIT
 902
 903         # set mgs rule to only accept krb5p
 904         set_rule _mgs any any krb5p
 905
 906         # umount everything, modules still loaded
 907         stopall
 908
 909         # start gss daemon on mgs node
 910         combined_mgs_mds || start_gss_daemons $mgs_HOST $LSVCGSSD "-vvv"
 911
 912         # start mgs
 913         start mgs $(mgsdevname 1) $MDS_MOUNT_OPTS
 914
 915         # mount with default flavor, expected to fail
 916         start ost1 "$(ostdevname 1)" $OST_MOUNT_OPTS
 917         wait_mgc_import_state ost1 FULL 0 &&
 918                 error "mount with default flavor should have failed"
 919         stop ost1
 920
 921         # mount with unauthorized flavor should fail
 922         if [ -z "$OST_MOUNT_OPTS" ]; then
 923                 new_opts="-o mgssec=null"
 924         else
 925                 new_opts="$OST_MOUNT_OPTS,mgssec=null"
 926         fi
 927         start ost1 "$(ostdevname 1)" $new_opts
 928         wait_mgc_import_state ost1 FULL 0 &&
 929                 error "mount with unauthorized flavor should have failed"
 930         stop ost1
 931
 932         # mount with designated flavor should succeed
 933         if [ -z "$OST_MOUNT_OPTS" ]; then
 934                 new_opts="-o mgssec=krb5p"
 935         else
 936                 new_opts="$OST_MOUNT_OPTS,mgssec=krb5p"
 937         fi
 938         start ost1 "$(ostdevname 1)" $new_opts
 939         wait_mgc_import_state ost1 FULL 0 ||
 940                 error "mount with designated flavor should have succeeded"
 941
 942         stop ost1 -f
 943 }
 944 run_test 151 "secure mgs connection: server flavor control"
 945
 946 exit_152() {
 947         zconf_umount $HOSTNAME $MOUNT
 948
 949         # remove mgs rule
 950         set_rule _mgs any any
 951
 952         zconf_mount $HOSTNAME $MOUNT
 953         if [ "$MOUNT_2" ]; then
 954                 zconf_mount $HOSTNAME $MOUNT2
 955         fi
 956 }
 957
 958 test_152() {
 959         local mount_opts
 960         local count
 961
 962         (( MDS1_VERSION >= $(version_code 2.15.64) )) ||
 963                 skip "Need MDS >= 2.15.64 for user context with MGS"
 964
 965         stack_trap exit_152 EXIT
 966
 967         if is_mounted $MOUNT2; then
 968                 umount_client $MOUNT2 || error "umount $MOUNT2 failed"
 969         fi
 970
 971         zconf_umount $HOSTNAME $MOUNT || error "umount $MOUNT failed"
 972
 973         # set mgs rule to only accept krb5p
 974         set_rule _mgs any any krb5p
 975
 976         # start gss daemon on mgs node
 977         combined_mgs_mds || start_gss_daemons $mgs_HOST $LSVCGSSD "-vvv"
 978
 979         # re-mount client with mgssec=krb5p
 980         mount_opts="${MOUNT_OPTS:+$MOUNT_OPTS,}mgssec=krb5p"
 981         zconf_mount $HOSTNAME $MOUNT $mount_opts ||
 982                 error "unable to mount client"
 983
 984         $RUNAS $LFS check mgts || error "check mgts as user failed"
 985         $RUNAS grep lgssc /proc/keys
 986
 987         $RUNAS $LFS flushctx $MOUNT || error "flushctx as user failed"
 988         $RUNAS grep lgssc /proc/keys
 989         count=$($RUNAS grep lgssc /proc/keys | grep -v "Running as" | wc -l)
 990         [[ $count == 0 ]] || error "remaining $count keys for user"
 991 }
 992 run_test 152 "secure mgs connection: user access"
 993
 994 test_200() {
 995         local nid=$(lctl list_nids | grep ${NETTYPE} | head -n1)
 996         local nidstr="peer_nid: ${nid},"
 997         local count
 998
 999         lfs df -h
1000         do_facet $SINGLEMDS $LCTL get_param -n \
1001                 mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr"
1002         count=$(do_facet $SINGLEMDS $LCTL get_param -n \
1003                 mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" |
1004                 grep -c 'delta: -')
1005         echo "found $count expired reverse contexts (1)"
1006         # We can have up to 3 expired contexts in the normal case:
1007         # - the newest one, that is just about to be renewed
1008         # - the previous one that had expired
1009         # - the one currently referenced in the sec, not updated in the absence
1010         #   of client activity.
1011         (( count < 4 )) || error "expired reverse contexts should be <= 3 (1)"
1012
1013         # unmount to get rid of old context
1014         umount_client $MOUNT || error "umount $MOUNT failed"
1015         kdestroy
1016         stack_trap "mount_client $MOUNT ${MOUNT_OPTS} || true" EXIT
1017         if is_mounted $MOUNT2; then
1018                 umount_client $MOUNT2 || error "umount $MOUNT2 failed"
1019                 stack_trap "mount_client $MOUNT2 ${MOUNT_OPTS}" EXIT
1020         fi
1021
1022         # update ticket lifetime to be 45s
1023         stack_trap "/usr/bin/cp -f /etc/krb5.conf.bkp /etc/krb5.conf" EXIT
1024         sed -i.bkp s+[^#]ticket_lifetime.*+ticket_lifetime\ =\ 45s+ \
1025                 /etc/krb5.conf
1026         # establish new context, and wait 3x lifetime
1027         mount_client $MOUNT ${MOUNT_OPTS} || error "remount failed"
1028         lfs df -h
1029         sleep 135
1030         # re-activate connections, and look for reverse contexts on server side
1031         lfs df -h
1032         do_facet $SINGLEMDS $LCTL get_param -n \
1033                 mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr"
1034         count=$(do_facet $SINGLEMDS $LCTL get_param -n \
1035                 mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" |
1036                 grep -c 'delta: -')
1037         echo "found $count expired reverse contexts (2)"
1038         (( count < 4 )) || error "expired reverse contexts should be <= 3 (2)"
1039 }
1040 run_test 200 "check expired reverse gss contexts"
1041
1042 cleanup_201() {
1043         # unmount to get rid of old context
1044         umount_client $MOUNT
1045         kdestroy
1046         if is_mounted $MOUNT2; then
1047                 umount_client $MOUNT2
1048         fi
1049
1050         # restore original krb5.conf
1051         cp -f /etc/krb5.conf.bkp /etc/krb5.conf
1052         rm -f /etc/krb5.conf.bkp
1053
1054         # remount client
1055         mount_client $MOUNT ${MOUNT_OPTS} || error "mount $MOUNT failed"
1056         if is_mounted $MOUNT2; then
1057                 mount_client $MOUNT2 ${MOUNT_OPTS} ||
1058                         error "mount $MOUNT2 failed"
1059         fi
1060 }
1061
1062 test_201() {
1063         local nid=$(lctl list_nids | grep ${NETTYPE} | head -n1)
1064         local nidstr="peer_nid: ${nid},"
1065         local count
1066
1067         lfs df -h
1068         $LFS mkdir -i 0 -c 1 $DIR/$tdir || error "mkdir $DIR/$tdir failed"
1069         stack_trap cleanup_201 EXIT
1070
1071         # unmount to get rid of old context
1072         umount_client $MOUNT || error "umount $MOUNT failed"
1073         kdestroy
1074         if is_mounted $MOUNT2; then
1075                 umount_client $MOUNT2 || error "umount $MOUNT2 failed"
1076         fi
1077
1078         # update ticket lifetime to be 90s
1079         sed -i.bkp s+[^#]ticket_lifetime.*+ticket_lifetime\ =\ 90s+ \
1080                 /etc/krb5.conf
1081         # establish new contexts
1082         mount_client $MOUNT ${MOUNT_OPTS} || error "remount failed"
1083         mount_client $MOUNT2 ${MOUNT_OPTS} || error "remount 2 failed"
1084         lfs df -h
1085
1086         # have ldlm lock on first mount
1087         touch $DIR/${tfile}_1
1088         stack_trap "rm -f $DIR/${tfile}*" EXIT
1089         # and make second mount take it
1090         touch $DIR2/$tdir/file001
1091
1092         # wait lifetime + 30s to have expired contexts
1093         echo Wait for gss contexts to expire... 120s
1094         sleep 120
1095
1096         do_facet $SINGLEMDS $LCTL get_param -n \
1097                 mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr"
1098         count=$(do_facet $SINGLEMDS $LCTL get_param -n \
1099                 mdt.*-MDT0000.gss.srpc_serverctx | grep "$nidstr" |
1100                 grep -vc 'delta: -')
1101         echo "found $count valid reverse contexts"
1102         (( count == 0 )) || error "all contexts should have expired"
1103
1104         # make first mount reclaim ldlm lock
1105         touch $DIR/${tfile}_2
1106         $LFS df $MOUNT2
1107         # this should not evict the second mount
1108         client_evicted $HOSTNAME && error "client got evicted"
1109
1110         exit 0
1111 }
1112 run_test 201 "allow expired ctx for ldlm callback"
1113
1114 complete_test $SECONDS
1115 set_flavor_all null
1116 cleanup_gss
1117 check_and_cleanup_lustre
1118 exit_status