From: Sebastien Buisson Date: Wed, 15 Nov 2023 10:22:13 +0000 (+0100) Subject: LU-17175 gss: start lsvcgssd from l_getauth X-Git-Tag: 2.15.62~118 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=414467762f8a034c72903bab8ebfce6e1feb8e79;hp=7e1fb1a296ec7ab21be7ec39e2b6a38fbca76b6c;p=fs%2Flustre-release.git LU-17175 gss: start lsvcgssd from l_getauth If l_getauth detects it cannot connect to the socket supposed to be opened by lsvcgssd, it tries to launch the daemon, with predefined default values. Test-Parameters: trivial Test-Parameters: kerberos=true testlist=sanity-krb5 Test-Parameters: testgroup=review-dne-selinux-ssk-part-2 Signed-off-by: Sebastien Buisson Change-Id: I3961ce0f548fb6ea23458edcb01a03fb8b3a617f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53142 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Aurelien Degremont Reviewed-by: Oleg Drokin --- diff --git a/lustre/conf/lsvcgss b/lustre/conf/lsvcgss index 9f930e0..658a7c0 100644 --- a/lustre/conf/lsvcgss +++ b/lustre/conf/lsvcgss @@ -1,2 +1,3 @@ # Optional arguments passed to lsvcgssd. -LSVCGSSDARGS='' + +#LSVCGSSDARGS='' diff --git a/lustre/scripts/lsvcgss b/lustre/scripts/lsvcgss index 46fcfc9..0f294a2 100755 --- a/lustre/scripts/lsvcgss +++ b/lustre/scripts/lsvcgss @@ -10,9 +10,12 @@ . /etc/init.d/functions LOCKFILE="/var/lock/subsys/lsvcgssd" + +# If service is not configured, launch with all mechs # -k -- Enable kerberos support # -s -- Enable shared key support -LSVCGSSDARGS="-k -s" +# -z -- Enable gssnull support +LSVCGSSDARGS="-k -s -z" # Check for and source configuration file [ -f /etc/sysconfig/lsvcgss ] && . /etc/sysconfig/lsvcgss diff --git a/lustre/tests/sanity-krb5.sh b/lustre/tests/sanity-krb5.sh index 656fb2e..6ce9c6b 100755 --- a/lustre/tests/sanity-krb5.sh +++ b/lustre/tests/sanity-krb5.sh @@ -318,6 +318,7 @@ run_test 3 "local cache under DLM lock" test_5() { local file1=$DIR/$tdir/$tfile-1 local file2=$DIR/$tdir/$tfile-2 + local file3=$DIR/$tdir/$tfile-3 local wait_time=$((TIMEOUT + TIMEOUT / 2)) mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed" @@ -328,25 +329,44 @@ test_5() { [ -f $file1 ] || error "$file1 not found" # flush context - $RUNAS $LFS flushctx $MOUNT || error "can't flush context" + $RUNAS $LFS flushctx $MOUNT || error "can't flush context (1)" # stop lsvcgssd send_sigint $(comma_list $(mdts_nodes)) $LSVCGSSD sleep 5 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD && - error "$LSVCGSSD still running" + error "$LSVCGSSD still running (1)" + + # daemon should restart automatically, at least on newer servers + $RUNAS touch $file2 + if [ $? -ne 0 ]; then + echo "$RUNAS touch $file2 failed" + (( MDS1_VERSION < $(version_code 2.15.61) )) || + error "$LSVCGSSD should restart automatically" + else + echo "$RUNAS touch $file2 succeeded" + fi + + # flush context + if (( MDS1_VERSION >= $(version_code 2.15.61) )); then + $RUNAS $LFS flushctx $MOUNT || error "can't flush context (2)" + fi - $RUNAS touch $file2 && error "should fail without $LSVCGSSD" + # stop lsvcgssd + send_sigint $(comma_list $(mdts_nodes)) $LSVCGSSD + sleep 5 + check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD && + error "$LSVCGSSD still running (2)" # restart lsvcgssd, expect touch succeed echo "restart $LSVCGSSD and recovering" start_gss_daemons $(comma_list $(mdts_nodes)) $LSVCGSSD "-vvv" sleep 5 check_gss_daemon_nodes $(comma_list $(mdts_nodes)) $LSVCGSSD - $RUNAS touch $file2 || error "should not fail now" - [ -f $file2 ] || error "$file2 not found" + $RUNAS touch $file3 || error "should not fail now" + [ -f $file3 ] || error "$file3 not found" } -run_test 5 "lsvcgssd dead, operations fail" +run_test 5 "lsvcgssd dead, operations pass" test_6() { local nfile=10 diff --git a/lustre/utils/gss/l_getauth.c b/lustre/utils/gss/l_getauth.c index d1fecbe..f20be2e 100644 --- a/lustre/utils/gss/l_getauth.c +++ b/lustre/utils/gss/l_getauth.c @@ -13,6 +13,23 @@ #include "lsupport.h" #include "err_util.h" +static int start_daemon(char *auth_req) +{ + int rc; + + rc = system("/usr/bin/systemctl restart lsvcgss"); + if (rc < 0 || (errno = WEXITSTATUS(rc))) { + printerr(LL_ERR, "systemctl restart lsvcgss service failed: %s\n", + strerror(errno)); + rc = -errno; + } else { + printerr(LL_INFO, "lsvcgss service automatically restarted\n"); + rc = 0; + } + + return rc; +} + int main(int argc, char **argv) { int local_socket; @@ -20,7 +37,7 @@ int main(int argc, char **argv) ssize_t bytes_sent; char *auth_req = NULL, *cachename = NULL; ssize_t req_len; - int opt, debug = 0, rc = 0; + int opt, debug = 0, tried_daemon = 0, rc = 0; /* Parameters received from kernel (see rsi_do_upcall()): * -c -r -d @@ -44,7 +61,7 @@ int main(int argc, char **argv) break; case 'd': debug = 1; - goto connect; + goto socket; case 'r': auth_req = optarg; break; @@ -72,24 +89,35 @@ int main(int argc, char **argv) req_len = strlen(auth_req); -connect: +socket: /* Send auth request to lsvcgssd via a socket. */ local_socket = socket(AF_UNIX, SOCK_STREAM, 0); if (local_socket == -1) { - rc = -errno; - printerr(LL_ERR, "cannot create socket: %d\n", rc); - return rc; + printerr(LL_ERR, "cannot create socket: %d\n", -errno); + return EXIT_FAILURE; } memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strncpy(addr.sun_path, GSS_SOCKET_PATH, sizeof(addr.sun_path) - 1); +connect: if (connect(local_socket, (struct sockaddr *)&addr, sizeof(addr)) == -1) { rc = -errno; printerr(LL_ERR, "cannot connect to socket: %d\n", rc); - goto out; + if (debug || tried_daemon == 5) + goto out; + if (!tried_daemon) { + rc = start_daemon(auth_req); + if (rc) { + rc = -1; + goto out; + } + sleep(2); + } + tried_daemon++; + goto connect; } if (debug) @@ -107,5 +135,5 @@ connect: out: close(local_socket); - return rc; + return rc < 0 ? EXIT_FAILURE : 0; }