From 653066790ba8228ee20cf0dd0f6e2646bbde712a Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Sun, 15 Aug 2021 22:14:39 -0500 Subject: [PATCH] EX-3658 utils: add pumount Add a utility ('pumount') to lazily unmount a filesystem and kill remaining users. Add a test script (sanity-puount.sh). Test-Parameters: testlist=sanity-pumount clientextra_install_params="--packages pumount" Signed-off-by: John L. Hammond Change-Id: Iaa937d51ca0003c92d1d63608e5d0f4f67ca92fb Reviewed-on: https://review.whamcloud.com/44736 Tested-by: jenkins --- .gitignore | 1 + autoMakefile.am | 2 + config/lustre-build.m4 | 20 +- contrib/lbuild/lbuild | 1 + lustre.spec.in | 24 + lustre/tests/Makefile.am | 1 + lustre/tests/sanity-pumount.sh | 684 +++++++++++++++++++++++++ pumount/.gitignore | 2 + pumount/Makefile.am | 7 + pumount/README | 78 +++ pumount/pumount.c | 1091 ++++++++++++++++++++++++++++++++++++++++ 11 files changed, 1910 insertions(+), 1 deletion(-) create mode 100755 lustre/tests/sanity-pumount.sh create mode 100644 pumount/.gitignore create mode 100644 pumount/Makefile.am create mode 100644 pumount/README create mode 100644 pumount/pumount.c diff --git a/.gitignore b/.gitignore index d9d43e0..4ea9363 100644 --- a/.gitignore +++ b/.gitignore @@ -82,6 +82,7 @@ TAGS /lustre*.tar.gz /lustre*.rpm /kmod-lustre*.rpm +/pumount*.rpm /kmp-*.files /kmp-*.preamble /mkinstalldirs diff --git a/autoMakefile.am b/autoMakefile.am index 16c7a3e..7a27b67 100644 --- a/autoMakefile.am +++ b/autoMakefile.am @@ -2,6 +2,7 @@ SUBDIRS := @LDISKFS_SUBDIR@ \ . \ @LUSTREIOKIT_SUBDIR@ \ libcfs \ + @PUMOUNT_SUBDIR@ \ @SNMP_SUBDIR@ \ lnet \ lustre @@ -9,6 +10,7 @@ SUBDIRS := @LDISKFS_SUBDIR@ \ DIST_SUBDIRS := ldiskfs \ lustre-iokit \ libcfs \ + @PUMOUNT_SUBDIR@ \ @SNMP_DIST_SUBDIR@ \ lnet \ lustre \ diff --git a/config/lustre-build.m4 b/config/lustre-build.m4 index 8ab2144..07b7ce1 100644 --- a/config/lustre-build.m4 +++ b/config/lustre-build.m4 @@ -128,6 +128,23 @@ AS_IF([test "x$with_libmount" = xyes], [ ]) # LB_LIBMOUNT # +# LB_PATH_PUMOUNT +# +AC_DEFUN([LB_PATH_PUMOUNT], [ +AC_MSG_CHECKING([whether to build pumount]) +AC_ARG_ENABLE([pumount], + AC_HELP_STRING([--disable-pumount], + [disable pumount (default is enable)]), + [], [enable_pumount="yes"]) +AC_MSG_RESULT([$enable_pumount]) +AS_IF([test "x$enable_pumount" = xyes], + [PUMOUNT_SUBDIR="pumount"], + [PUMOUNT_SUBDIR=""]) +AC_SUBST(PUMOUNT_SUBDIR) +AM_CONDITIONAL([BUILD_PUMOUNT], [test "x$enable_pumount" = xyes]) +]) # LB_PATH_PUMOUNT + +# # LB_PATH_SNMP # # check for in-tree snmp support @@ -468,6 +485,7 @@ AC_DEFUN([LB_CONFIG_FILES], [ lustre-iokit/mds-survey/Makefile lustre-iokit/ior-survey/Makefile lustre-iokit/stats-collect/Makefile + pumount/Makefile ) ]) @@ -677,7 +695,7 @@ LC_QUOTA LB_LIBMOUNT LB_PATH_SNMP LB_PATH_LUSTREIOKIT - +LB_PATH_PUMOUNT LB_DEFINE_E2FSPROGS_NAMES LIBCFS_CONFIGURE diff --git a/contrib/lbuild/lbuild b/contrib/lbuild/lbuild index 7c9a816..6c1607a 100755 --- a/contrib/lbuild/lbuild +++ b/contrib/lbuild/lbuild @@ -1009,6 +1009,7 @@ build_lustre() { # move RPMs into place where they are expected to be mv -f *lustre*.${TARGET_ARCH}.rpm $TOPDIR/RPMS/${TARGET_ARCH}/ + mv -f pumount-*.rpm $TOPDIR/RPMS/${TARGET_ARCH}/ || true # Not always built mv -f lustre-*.src.rpm $TOPDIR/SRPMS/ popd >/dev/null diff --git a/lustre.spec.in b/lustre.spec.in index 2b10833..6741d58 100644 --- a/lustre.spec.in +++ b/lustre.spec.in @@ -8,6 +8,11 @@ %bcond_without lustre_utils %bcond_without lustre_iokit %bcond_without lustre_modules +%if 0%{?rhel} == 6 +%bcond_with pumount +%else +%bcond_without pumount +%endif %bcond_with snmp %bcond_with gss %bcond_with gss_keyring @@ -434,6 +439,15 @@ simulate MDT service threads) locally on the MDS node, and does not need Lustre clients in order to run %endif +%if %{with pumount} +%package -n pumount +Summary: Lazily unmount a filesystem and kill remaining users +Group: System Environment/Base + +%description -n pumount +A utility to lazily unmount a filesystem and kill remaining users. +%endif # with pumount + %if 0%{?suse_version} %debug_package %endif @@ -633,6 +647,7 @@ fi %{!?with_shared:--disable-shared} \ %{!?with_static:--disable-static} \ %{!?with_lustre_iokit:--disable-iokit} \ + %{!?with_pumount:--disable-pumount} \ %{!?with_ldiskfs:--disable-ldiskfs} \ %{!?with_servers:--disable-server} \ %{!?with_zfs:--without-zfs} \ @@ -847,6 +862,9 @@ fi %if %{with zfs} %exclude %{_sbindir}/zfsobj2fid %endif +%if %{with pumount} +%exclude %{_sbindir}/pumount +%endif %if %{with lustre_utils} %if %{with servers} %{_libexecdir}/lustre/lc_common @@ -946,6 +964,12 @@ fi %doc lustre-iokit/stats-collect/README.iokit-lstats %endif +%if %{with pumount} +%files -n pumount +%defattr(-, root, root) +%{_sbindir}/pumount +%endif # with pumount + %post %if %{with systemd} %systemd_post lnet.service diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 91381ce..cb3128e 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -39,6 +39,7 @@ noinst_SCRIPTS += sanity-lfsck.sh lfsck-performance.sh sanity-lipe.sh noinst_SCRIPTS += resolveip noinst_SCRIPTS += sanity-hsm.sh sanity-lsnapshot.sh sanity-pfl.sh sanity-flr.sh noinst_SCRIPTS += sanity-dom.sh sanity-pcc.sh dom-performance.sh sanity-lnet.sh +noinst_SCRIPTS += sanity-pumount.sh noinst_SCRIPTS += hot-pools.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi diff --git a/lustre/tests/sanity-pumount.sh b/lustre/tests/sanity-pumount.sh new file mode 100755 index 0000000..3768c46 --- /dev/null +++ b/lustre/tests/sanity-pumount.sh @@ -0,0 +1,684 @@ +#!/bin/bash + +set -e + +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT="$SANITY_PUMOUNT_EXCEPT" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + +SRCDIR=$(dirname $0) +PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH + +CHECKSTAT=${CHECKSTAT:-"checkstat -v"} +OPENFILE=${OPENFILE:-openfile} +OPENUNLINK=${OPENUNLINK:-openunlink} +MULTIOP=${MULTIOP:-multiop} +MOUNT_2=${MOUNT_2:-"yes"} +SAVE_PWD=$PWD + +export TMP=${TMP:-/tmp} +export NAME=${NAME:-local} + +LUSTRE=${LUSTRE:-`dirname $0`/..} +. $LUSTRE/tests/test-framework.sh +CLEANUP=${CLEANUP:-:} +SETUP=${SETUP:-:} +init_test_env "$@" +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +get_lustre_env +init_logging + +FAIL_ON_ERROR=false +SETUP=${SETUP:-:} +TRACE=${TRACE:-""} + +check_and_setup_lustre + +assert_DIR +rm -rf $DIR1/[df][0-9]* $DIR1/lnk $DIR/[df].${TESTSUITE}* + +type pumount || skip_env "pumount is not installed" + +# $RUNAS_ID may get set incorrectly somewhere else +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" + +check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS + +build_test_filter + +declare -a MOUNT_LIST=("$MOUNT1" "$MOUNT2") +declare -A MOUNT_NAME # Array mapping $MOUNTx to $(lfs getname $MOUNTx) +declare -A SAVED_MOUNT +declare -r SIGKILL=9 +declare -r SIGUSR1=10 +declare -r SIGUSR2=12 +declare -r SIGTERM=15 +declare -r SIGCHLD=17 # Ignored by default. +declare SLEEP_TIME=60 + +function sanity_pumount_init() { + local mount + + for mount in "${MOUNT_LIST[@]}"; do + if mountpoint --quiet "$mount"; then + SAVED_MOUNT[$mount]=true + fi + + umount_client "$mount" || true + mount_client "$mount" + done +} + +function sanity_pumount_fini() { + local mount + + for mount in "${MOUNT_LIST[@]}"; do + umount_client "$mount" || true + + if ${SAVED_MOUNT[$mount]:-false}; then + mount_client "$mount" + fi + done +} + +function init_pumount_env() { + local mount + + for mount in "${MOUNT_LIST[@]}"; do + mount_client "$mount" + + MOUNT_NAME["$mount"]=$($LFS getname "$mount") + done +} + +function check_mounted() { + local mount + + for mount in "$@"; do + mountpoint --quiet "$mount" || error "'$mount' not mounted" + done +} + +function check_unmount_complete() { + local mount="$1" + + # Check if lazy umount of $mount is complete. + # + # When umount2($mount, MNT_DETACH) returns + # 1. The mount at $mount will be detached from the namespace. + # 2. $mount will no longer appear in /proc/[pid]/mountinfo for any pid. + # + # *But* the lite instance param remains after the lazy umount + # command returns and exists until the last reference is + # dropped and the umount completes. + + # mountpoint uses /proc/self/mountinfo + mountpoint --quiet "$mount" && error "'$mount' still mounted" + $LCTL list_param "llite.${MOUNT_NAME[$mount]}" 2> /dev/null && + error "unmount of '$mount' did not complete" + + return 0 +} + +function wait_signaled() { + local pid=$1 + local sig=$2 + local status + + wait "$pid" + status=$? + + ((status == 128 + sig)) || error "pid $pid terminated with status $status, expected $((128 + sig))" +} + +function kill_wait_signaled() { + local pid=$1 + local sig_to_send=$2 + local sig_to_expect=$3 + local status + + # Send sig_to_send to PID but chekc that it already terminated by sig_to_expect. + kill "-${sig_to_send}" "${pid}" # May fail. + wait "$pid" + status=$? + + ((status == 128 + sig_to_expect)) || + error "pid $pid terminated with status $status, expected $((128 + sig_to_expect))" +} + +sanity_pumount_init + +test_20a() { + init_pumount_env + + pumount "$MOUNT1" || error "pumount failed" + check_unmount_complete "$MOUNT1" + check_mounted "$MOUNT2" +} +run_test 20a "pumount works" + +test_20b() { + init_pumount_env + umount_client "$MOUNT2" + if pumount "$MOUNT2"; then + error "pumount should fail" + fi +} +run_test 20b "pumount fails on non mount point" + +test_20c() { + local option + + for option in --dry-run --no-open --no-signal --no-umount --print; do + init_pumount_env + umount_client "$MOUNT2" + + if pumount $option "$MOUNT2"; then + error "pumount $opt should fail" + fi + done +} +run_test 20c "pumount with options fails on non mount point" + +test_21a() { + local file=$DIR/$tfile + local pid + + init_pumount_env + + echo ZZZ > "$file" + $MULTIOP "$file" o_c & + pid=$! + sleep 5 + + pumount "$MOUNT1" || error "pumount failed" + wait_signaled $pid $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 21a "pumount works with an open file (multiop)" + +test_22a() { + local file=$DIR/$tfile + local pid + + init_pumount_env + + dd if=/dev/zero of="$file" bs=4K count=1 + $MULTIOP "$file" OsM_c & + pid=$! + sleep 5 + + pumount "$MOUNT1" || error "pumount failed" + wait_signaled $pid $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 22a "pumount works with an mmapped file (multiop)" + +test_22b() { + local file=$DIR/$tfile + local pid + + init_pumount_env + + dd if=/dev/zero of="$file" bs=4K count=1 + $MULTIOP "$file" OsM_c & + pid=$! + sleep 5 + rm "$file" + + pumount "$MOUNT1" || error "pumount failed" + wait_signaled $pid $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 22b "pumount works with a deleted mmapped file (multiop)" + +test_30a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + # This is less racy than 'sleep $SLEEP_TIME > $file &'. + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 30a "pumount works with an open file (2)" + +test_30b() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + # This is less racy than 'sleep $SLEEP_TIME > $file &'. + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount --no-open "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 30b "pumount --no-open works with an open file (2)" + +test_31a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + rm "$file" + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 31a "pumount works with a deleted open file" + +test_31b() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + mv "$file" "$file-1" + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 31b "pumount works with a renamed open file" + +test_31c() { + local dir="$DIR/$tdir" + local fd + local pid + + init_pumount_env + + mkdir "$dir" + exec {fd}<"$dir" + sleep $SLEEP_TIME & + pid=$! + exec {fd}<&- + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 31c "pumount works with an open directory" + +test_31d() { + local dir="$DIR/$tdir" + local fd + local pid + + init_pumount_env + + mkdir "$dir" + exec {fd}<"$dir" + sleep $SLEEP_TIME & + pid=$! + exec {fd}<&- + rmdir "$dir" + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 31d "pumount works with a removed open directory" + +test_31e() { + local file="$DIR/$tfile" + local fd + local pid + + init_pumount_env + + mkfifo "$file" + exec {fd}<>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 31e "pumount works with a fifo" + +test_32a() { + local sleep=$DIR/sleep + local pid + + init_pumount_env + cp /usr/bin/sleep $sleep + + $sleep $SLEEP_TIME & + pid=$! + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 32a "pumount works with an executable" + +test_32b() { + local sleep=$DIR/sleep + local pid + + init_pumount_env + cp /usr/bin/sleep $sleep + + $sleep $SLEEP_TIME & + pid=$! + rm $sleep + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 32b "pumount works with a deleted executable" + +test_32c() { + local sleep=$DIR/sleep + local pid + + init_pumount_env + cp /usr/bin/sleep $sleep + + $sleep $SLEEP_TIME & + pid=$! + mv $sleep $sleep-1 + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 32c "pumount works with a renamed executable" + +test_33a() { + local pid + + init_pumount_env + + pushd $DIR + sleep $SLEEP_TIME & + pid=$! + popd + + pumount "$MOUNT1" || error "pumount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 33a "pumount works on CWDs" + +# TODO run_test XX "pumount works with a chroot" + +test_50a() { + pumount --help || error "pumount --help failed" +} +run_test 50a "pumount --help works" + +test_50b() { + pumount --version || errro "pumount --version failed" +} +run_test 50b "pumount --version works" + +test_51a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount --dry-run "$MOUNT1" | + awk -v pid=$pid '$2 == pid' | grep . || + error "dry-run did not show PID $pid" + + kill_wait_signaled $pid $SIGUSR2 $SIGUSR2 + check_mounted "$MOUNT1" +} +run_test 51a "pumount --dry-run works" + +test_52a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount --force "$MOUNT1" || error "pumount --force failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 52a "pumount --force works" + +test_53a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount --no-signal "$MOUNT1" || error "pumount --no-signal failed" + kill_wait_signaled $pid $SIGUSR2 $SIGUSR2 + check_unmount_complete "$MOUNT1" +} +run_test 53a "pumount --no-signal works" + +test_54a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount --no-umount "$MOUNT1" || error "pumount --no-umount failed" + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_mounted "$MOUNT1" +} +run_test 54a "pumount --no-umount works" + +test_55a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + sleep 1 + + # pumount --print fields + # COMM=1 PID=2 REF=3 HANDLE_TYPE=4 HANDLE=5 PATH=6 + # REF is exe, cwd, rtd, map, or an FD + pumount --print "$MOUNT1" | awk '$1 == "sleep"' | grep . || + error "pumount --print COMM != SLEEP'" + + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 55a "pumount --print shows the right comm" + +test_55b() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + sleep 1 + + pumount --print "$MOUNT1" | awk -v pid=$pid '$2 == pid' | grep . || + error "pumount --print PID != $pid'" + + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 55b "pumount --print shows the right PID" + +test_55c() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + sleep 1 + + pumount --print "$MOUNT1" | awk -v fd="$fd" '$3 == fd' | grep . || + error "pumount --print FD != $fd" + + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 55c "pumount --print shows the right FD" + +test_55d() { + local file=$DIR/$tfile + local fd + local fid + local pid + + init_pumount_env + + exec {fd}>"$file" + fid=$($LFS path2fid "$file") + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + sleep 1 + + pumount --print "$MOUNT1" | awk -v fid="$fid" '$4 == "lustre" && $5 == fid' | grep . || + error "pumount --print HANDLE_TYPE != 'lustre' or FID != '$fid'" + + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 55d "pumount --print shows the right handle type and FID" + +test_55e() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + sleep 1 + + # XXX path changes after umount2(..., MNT_DETACH) from '/mnt/$FSNAME/$tfile' to '/$tfile' + pumount --print "$MOUNT1" | awk -v path="/$tfile" '$6 == path' | grep . || + error "pumount --print PATH != '/$tfile'" + + kill_wait_signaled $pid $SIGUSR2 $SIGKILL + check_unmount_complete "$MOUNT1" +} +run_test 55e "pumount --print shows the right path (sort of)" + +test_56a() { + local file=$DIR/$tfile + local fd + local pid + + for sig in SIGTERM sigterm TERM term $SIGTERM; do + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + pumount --signal=$sig "$MOUNT1" || error "pumount --signal failed" + kill_wait_signaled $pid $SIGUSR2 $SIGTERM + check_unmount_complete "$MOUNT1" + done +} +run_test 56a "pumount --signal works" + +test_57a() { + local file=$DIR/$tfile + local fd + local pid + + init_pumount_env + + exec {fd}>"$file" + sleep $SLEEP_TIME & + pid=$! + exec {fd}>&- + + ! pumount --signal=$SIGCHLD "$MOUNT1" || error "pumount should fail" + ! mountpoint --quiet "$MOUNT1" || error "'$MOUNT1' still in mountinfo" + kill_wait_signaled $pid $SIGTERM $SIGTERM + check_unmount_complete "$MOUNT1" +} +run_test 57a "pumount fails when processes remain" + +log "cleanup: ======================================================" + +sanity_pumount_fini + +# kill and wait in each test only guarentee script finish, but command in script +# like 'rm' 'chmod' may still be running, wait for all commands to finish +# otherwise umount below will fail +[ "$(mount | grep $MOUNT2)" ] && wait_update $HOSTNAME "fuser -m $MOUNT2" "" || + true + +complete $SECONDS +check_and_cleanup_lustre +exit_status diff --git a/pumount/.gitignore b/pumount/.gitignore new file mode 100644 index 0000000..03718b3 --- /dev/null +++ b/pumount/.gitignore @@ -0,0 +1,2 @@ +pumount +Makefile.in diff --git a/pumount/Makefile.am b/pumount/Makefile.am new file mode 100644 index 0000000..468e3ed --- /dev/null +++ b/pumount/Makefile.am @@ -0,0 +1,7 @@ +sbin_PROGRAMS = pumount +pumount_SOURCES = pumount.c +pumount_CPPFLAGS := -D_GNU_SOURCE +pumount_CFLAGS := -g -Wall -Werror +pumount_LDFLAGS := +pumount_LDADD := +LIBS = # Don't want autocrud LIBS. diff --git a/pumount/README b/pumount/README new file mode 100644 index 0000000..fe319a6 --- /dev/null +++ b/pumount/README @@ -0,0 +1,78 @@ +Usage pumount [OPTION]... DIRECTORY +Lazily unmount the filesystem (FS) mounted at DIRECTORY and signal any +processes still using FS. + +Mandatory arguments to long options are mandatory for short options too. + --debug enable debugging oupput + --dry-run equivalent to '--no-signal --no-umount --print' + -f, --force force unmount FS + --h, --help print this help message and exit + --no-open do not open DIRECTORY + --no-signal do not send signals + --no-umount do not unmount FS + -p, --print print users of FS + --scan-count=COUNT scan for users at most COUNT times + --signal=SIG send SIG to users of FS (default SIGKILL) + --version print version information and exit + +Operation: +1. Open '/proc' for scanning. +2. Open DIRECTORY and use name_to_handle_at() to get mount id of FS + (unless --no-open). +3. Start a lazy unmount of DIRECTORY (unless --no-mount). +4. Scan for '/proc' for processed using FS. For each process directory + ('/proc/[pid]') other than its own, pumount will: + a. Open the process directory. + b. Determine if the process is using any files belonging to FS. + + pumount detects files belonging to FS by calling + name_to_handle_at() on the magical procfs symlinks + (proc/[pid]/{exe,cwd,root}, /proc/[pid]/fd/[fd] and + /proc/[pid]/map_files/[range] to get the mount id of the + file. Note that pumount does not access the files used by + the process. In particular, and unlike lsof, pumount does not + stat files. + + If printing (--print) is enabled then pumount prints the uses + of files belonging to FS in an lsof-like format. In this case + readlinkat() will be called on the procfs symlinks of used + files to expand their paths. + + c. If the process is using FS then send SIG to it (unless --no-signal). + d. Close the process directory. + +5. Repeat the scan (step 4) at most COUNT times, stopping if a scan + completed with no uses of FS detected. +6. Close the DIRECTORY file descriptor. +7. Close the '/proc' file descriptor. + +Notes: + pumount avoids mount id reuse races by holding DIRECTORY open until + step 6. Similarly, pumount avoids PID reeuse races by holding the + process directory open across 4b and 4c. + + When DIRECTORY is a Lustre client mount point, no RPCs will be sent + and no LDLM locks will be acquired on behalf of the actions in 4b. + +Examples: +Unmount the client mounted at /lustre/fs0a12/client/ and kill remaining +processes using it: +# pumount /lustre/fs0a12/client/ + +Print the processes using /lustre/fs0a12/client: +# pumount --dry-run /lustre/fs0a12/client/ +bash 8142 cwd lustre [0x200000404:0x5:0x0] /lustre/fs0a12/client/home/eggbert +eggrep 8446 cwd lustre [0x200000404:0x5:0x0] /lustre/fs0a12/client/home/eggbert +eggrep 8446 txt lustre [0x200000404:0x7:0x0] /lustre/fs0a12/client/home/eggbert/eggrep +eggrep 8446 map lustre [0x200000404:0x7:0x0] /lustre/fs0a12/client/home/eggbert/eggrep +eggrep 8446 map lustre [0x200000404:0x7:0x0] /lustre/fs0a12/client/home/eggbert/eggrep +eggrep 8446 map lustre [0x200000404:0x7:0x0] /lustre/fs0a12/client/home/eggbert/eggrep +eggrep 8446 0 lustre [0x200000404:0x8:0x0] /lustre/fs0a12/client/home/eggbert/EGGDATA.csv +eggrep 8446 1 lustre [0x200000404:0x6:0x0] /lustre/fs0a12/client/home/eggbert/results.txt +eggrep 8446 2 lustre [0x200000404:0x9:0x0] /lustre/fs0a12/client/home/eggbert/results.err +python 8942 0 lustre [0x200000404:0x3:0x0] /lustre/fs0a12/client/projects/EGGS\040V7\040FINAL.xls + +Exit status: + 0 Unmount succeeded and no users remained. + 1 Unmount failed or users remained or other error. + 2 An invalid option or argument was supplied. diff --git a/pumount/pumount.c b/pumount/pumount.c new file mode 100644 index 0000000..72fbf58 --- /dev/null +++ b/pumount/pumount.c @@ -0,0 +1,1091 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef VERSION +# define PU_VERSION VERSION +#else +# define PU_VERSION "0.0.42" +#endif + +static const char pu_program_name[] = "pumount"; +static const char pu_version[] = PU_VERSION; +static bool pu_debug; + +#define ARRAY_SIZE(a) ((sizeof(a)) / (sizeof((a)[0]))) + +#define PU_DEBUG(fmt, args...) \ + do { \ + if (pu_debug) \ + fprintf(stderr, "DEBUG %s:%d: "fmt, __func__, __LINE__, ##args); \ + } while (0) + +#define PU_DEBUG_B(x) PU_DEBUG("%s = %s\n", #x, (x) ? "true" : "false") +#define PU_DEBUG_D(x) PU_DEBUG("%s = %"PRIdMAX"\n", #x, (intmax_t)x) +#define PU_DEBUG_P(x) PU_DEBUG("%s = %p\n", #x, x) +#define PU_DEBUG_S(x) PU_DEBUG("%s = '%s'\n", #x, x) +#define PU_DEBUG_U(x) PU_DEBUG("%s = %"PRIuMAX"\n", #x, (uintmax_t)x) +#define PU_DEBUG_X(x) PU_DEBUG("%s = %"PRIxMAX"\n", #x, (uintmax_t)x) + +#define PU_ERROR(fmt, args...) \ + fprintf(stderr, "%s: "fmt, pu_program_name, ##args) + +#define PU_FATAL(fmt, args...) \ + do { \ + PU_ERROR("fatal: "fmt, ##args); \ + exit(EXIT_FAILURE); \ + } while (0) + +#define OOM() \ + PU_FATAL("out of memory\n"); + +#define TRY_HELP_1() \ + do { \ + fprintf(stderr, \ + "Try '%s --help' for more information.\n", \ + pu_program_name); \ + exit(2); \ + } while (0) + +#define TRY_HELP(fmt, args...) \ + do { \ + fprintf(stderr, "%s: "fmt, \ + pu_program_name, \ + ##args); \ + TRY_HELP_1(); \ + } while (0) + +static int pu_strtoi(const char *str, int *pval, int min, int max) +{ + long long val; + char *end = NULL; + + errno = 0; + val = strtoll(str, &end, 10); + if (errno != 0 || *end != '\0' || val != (int)val) + return -EINVAL; + + if (!(min <= val && val <= max)) + return -EINVAL; + + *pval = val; + + return 0; +} + +static const char *const PU_SIGNAL_NAMES[] = { +#define X(sig) [sig] = #sig + /* kill -l | tr '[[:space:]]' '\n' | grep -v SIGRT | sed --quiet '/^SIG[A-Z0-9]*$/ s/\(.*\)/X(\1),/p' */ + X(SIGHUP), + X(SIGINT), + X(SIGQUIT), + X(SIGILL), + X(SIGTRAP), + X(SIGABRT), + X(SIGBUS), + X(SIGFPE), + X(SIGKILL), + X(SIGUSR1), + X(SIGSEGV), + X(SIGUSR2), + X(SIGPIPE), + X(SIGALRM), + X(SIGTERM), + X(SIGSTKFLT), + X(SIGCHLD), + X(SIGCONT), + X(SIGSTOP), + X(SIGTSTP), + X(SIGTTIN), + X(SIGTTOU), + X(SIGURG), + X(SIGXCPU), + X(SIGXFSZ), + X(SIGVTALRM), + X(SIGPROF), + X(SIGWINCH), + X(SIGIO), + X(SIGPWR), + X(SIGSYS), +#undef X +}; + +/* Parse a signal, accepting the same formats as /bin/kill: numeric as + * well as names with or without the SIG prefix in upper or lower case + * (9, SIGKILL, KILL, sigkill, kill). */ +static int pu_str_to_signal(const char *str, int *psig) +{ + int sig; + + if (isdigit(*str)) + return pu_strtoi(str, psig, 0, INT_MAX); /* Or SIGRTMAX? */ + + for (sig = 0; sig < ARRAY_SIZE(PU_SIGNAL_NAMES); sig++) { + const char *name = PU_SIGNAL_NAMES[sig]; + + if (name == NULL) + continue; + + if (strcasecmp(str, name) == 0 || + strcasecmp(str, name + 3) == 0) { + *psig = sig; + + return 0; + } + } + + return -EINVAL; +} + +static char *pu_str_escape2(const char *str, size_t count) +{ + const char *s; + char *esc, *e; + + esc = calloc(4 * count + 1, sizeof(esc[0])); + if (esc == NULL) + OOM(); + + e = esc; + s = str; + + while (s < str + count) { + int c = *(s++); + + if (!isprint(c) || strchr(" \t\n\v\f\r\\", c) == NULL) { + *(e++) = c; + } else { + *(e++) = '\\'; + *(e++) = '0' + ((c >> 6) & 7); + *(e++) = '0' + ((c >> 3) & 7); + *(e++) = '0' + ((c >> 0) & 7); + } + } + + *(e++) = '\0'; + + assert(e <= esc + 4 * count + 1); + + return esc; +} + +static char *pu_str_unescape2(const char *esc, size_t count) +{ + const char *e; + char *str, *s; + + str = calloc(count + 1, sizeof(str[0])); + if (str == NULL) + OOM(); + + s = str; + e = esc; + + while (e < esc + count) { + int c = *(e++); + int o1, o2, o3; + + if (c != '\\') { + *(s++) = c; + } else if (e + 3 <= esc + count) { + o1 = *(e++); + o2 = *(e++); + o3 = *(e++); + *(s++) = (o1 << 6) | (o2 << 3) | (o3 << 0); + } else { + break; + } + } + + *(s++) = '\0'; + + assert(s <= str + count + 1); + + return str; +} + +static char *pu_str_unescape(const char *esc) +{ + return pu_str_unescape2(esc, strlen(esc)); +} + +static void pu_close(int *pfd) +{ + int rc; + + if (*pfd < 0) + return; + + rc = close(*pfd); + assert(rc == 0); + *pfd = -1; +} + +static void pu_fclose(FILE **pfile) +{ + int rc; + + if (*pfile == NULL) + return; + + rc = fclose(*pfile); + assert(rc == 0); + *pfile = NULL; +} + +static void pu_closedir(DIR **pdir) +{ + int rc; + + if (*pdir == NULL) + return; + + rc = closedir(*pdir); + assert(rc == 0); + *pdir = NULL; +} + +static int pu_opendir_at(int dirfd, const char *path, DIR **pdir, int *pfd) +{ + DIR *dir = NULL; + int fd = -1; + int fd2 = -1; + int rc = -1; + + fd = openat(dirfd, path, O_RDONLY|O_DIRECTORY|O_NOFOLLOW); + if (fd < 0) { + rc = -errno; + goto out; + } + + fd2 = dup(fd); + if (fd2 < 0) { + rc = -errno; + goto out; + } + + dir = fdopendir(fd2); + if (dir == NULL) { + rc = -errno; + goto out; + } + + fd2 = -1; /* dir owns it now. */ + + *pdir = dir; + dir = NULL; + + *pfd = fd; + fd = -1; + + rc = 0; +out: + pu_closedir(&dir); + pu_close(&fd); + pu_close(&fd2); + + return rc; +} + +static int pu_readlink_at(int dirfd, const char *path, char **ptarget, bool escape) +{ + char *buf = NULL; + size_t buf_size; + ssize_t rc; + + for (buf_size = PATH_MAX; buf_size < 1024 * PATH_MAX; buf_size *= 2) { + buf = realloc(buf, buf_size); + if (buf == NULL) + OOM(); + + rc = readlinkat(dirfd, path, buf, buf_size); + if (rc < 0) { + rc = -errno; + goto out; + } + + if (rc == buf_size) + continue; /* Truncation or no room for trailing '\0' */ + + buf[rc] = '\0'; + + if (escape) { + *ptarget = pu_str_escape2(buf, rc); + } else { + *ptarget = buf; + buf = NULL; + } + + rc = 0; + goto out; + } + + rc = -ENAMETOOLONG; +out: + free(buf); + + return rc; +} + +/* Find a mountinfo entry by mount_id, return the mount point in *pmount_point */ +static int pu_get_mountinfo(int proc_dirfd, int mount_id, char **pmount_point) +{ + int fd = -1; + FILE *file = NULL; + char *line = NULL; + size_t line_size = 0; + int rc; + + fd = openat(proc_dirfd, "self/mountinfo", O_RDONLY); + if (fd < 0) { + rc = -errno; + PU_ERROR("cannot open '%s/%s': %s\n", + "/proc", "self/mountinfo", strerror(errno)); + goto out; + } + + file = fdopen(fd, "r"); + if (file == NULL) { + rc = -errno; + PU_ERROR("cannot open '%s/%s': %s\n", + "/proc", "self/mountinfo", strerror(errno)); + goto out; + } + + fd = -1; /* file owns it now. */ + + while (getline(&line, &line_size, file) != -1) { + char *str = line; + const char *mount_id_str; + const char *mount_point; + + /* 110 39 1273:181606 / /mnt/lustre rw ... */ + + mount_id_str = strsep(&str, " "); + /* parent_id = */ strsep(&str, " "); + /* major:minor = */ strsep(&str, " "); + /* root = */ strsep(&str, " "); + mount_point = strsep(&str, " "); + + if (atoi(mount_id_str) != mount_id || + mount_point == NULL) + continue; + + *pmount_point = pu_str_unescape(mount_point); + rc = 0; + goto out; + } + + rc = -EINVAL; +out: + pu_fclose(&file); + pu_close(&fd); + free(line); + + return rc; +} + +struct pu_fid { + __u64 f_seq; + __u32 f_oid; + __u32 f_ver; +}; + +struct pu_lustre_nfs_fid { + struct pu_fid lnf_child; + struct pu_fid lnf_parent; +}; + +enum { + PU_MOUNT_ID_FAKE = INT_MAX, + PU_FILEID_LUSTRE = 0x97, /* From linux/exportfs.h */ + PU_FILEID_INVALID = 0xff, /* ... */ +}; + +static int pu_name_to_handle(int dirfd, const char *path, struct file_handle **phandle, int *mount_id, int flags) +{ + struct file_handle *handle = NULL; + int rc; + + /* + * This could also be done for open files by reading the + * mnt_id field from /proc/$pid/fdinfo/$fd. But this is + * faster and also works for file references like cwd, exe, + * root, and maps. For already referenced files (on Lustre at + * least) this is very cheap (no RPCs, no locks). See + * ll_encode_fh(). + * + * MAX_HANDLE_SZ == 128, so this is not huge. */ + handle = calloc(1, sizeof(*handle) + MAX_HANDLE_SZ); + if (handle == NULL) + OOM(); + + handle->handle_bytes = MAX_HANDLE_SZ; + + rc = name_to_handle_at(dirfd, path, handle, mount_id, flags); + if (rc == 0) + goto out; + + /* Some pseudo FS types (like /proc, /sys, /dev/pts, socket:, + * pipe:, ...) do not support name_to_handle_at(). We already + * know that the FS which we are unmounting supports + * name_to_handle_at() since we have its mount id. So + * EOPNOTSUPP means != to the mount id we want. Mask this by + * cooking up a fake mount id. */ + if (errno == EOPNOTSUPP) { + handle->handle_type = PU_FILEID_INVALID; + handle->handle_bytes = 0; + *mount_id = PU_MOUNT_ID_FAKE; + rc = 0; + goto out; + } + + if (errno != EOVERFLOW) { + rc = -errno; + goto out; + } + + handle = realloc(handle, sizeof(*handle) + handle->handle_bytes); + if (handle == NULL) + OOM(); + + rc = name_to_handle_at(dirfd, path, handle, mount_id, flags); + if (rc < 0) { + rc = -errno; + goto out; + } + + rc = 0; +out: + if (rc == 0 && phandle != NULL) { + *phandle = handle; + handle = NULL; + } + + free(handle); + + return rc; +} + +static int pu_name_to_mount_id(int dirfd, const char *path, int *mount_id, int flags) +{ + return pu_name_to_handle(dirfd, path, NULL, mount_id, flags); +} + +static int pu_fd_to_mount_id(int fd, int *mount_id) +{ + return pu_name_to_handle(fd, "", NULL, mount_id, AT_EMPTY_PATH); +} + +struct pu_ctl { + pid_t puc_pid; + int puc_dirfd; /* /proc/$pid */ + int puc_err_count; + int puc_use_count; + bool puc_print; /* If so then print. If not then stop when we find first use. */ + char *puc_comm; /* Escaped comm, initialized on demand by puc_comm(). */ +}; + +static const char *puc_comm(struct pu_ctl *puc) +{ + const size_t PU_COMM_LEN_MAX = 15; + char buf[PU_COMM_LEN_MAX + 2]; + int comm_fd = -1; + ssize_t rc; + + if (puc->puc_comm != NULL) + goto out; + + comm_fd = openat(puc->puc_dirfd, "comm", O_RDONLY); + if (comm_fd < 0) + goto out; + + /* Expect up to 15 chars followed by a newline. */ + rc = read(comm_fd, buf, sizeof(buf)); + if (rc <= 1) + goto out; + + if (buf[rc - 1] == '\n') + rc--; + + puc->puc_comm = pu_str_escape2(buf, rc); +out: + pu_close(&comm_fd); + + return puc->puc_comm != NULL ? puc->puc_comm : "-"; +} + +static void puc_free(struct pu_ctl *puc) +{ + if (puc == NULL) + return; + + pu_close(&puc->puc_dirfd); + free(puc->puc_comm); + free(puc); +} + +static int puc_create(struct pu_ctl **ppuc, int proc_dirfd, pid_t pid, const char *pid_name) +{ + struct pu_ctl *puc = NULL; + int rc; + + puc = calloc(1, sizeof(*puc)); + if (puc == NULL) + OOM(); + + puc->puc_pid = pid; + + puc->puc_dirfd = openat(proc_dirfd, pid_name, O_RDONLY|O_DIRECTORY); + if (puc->puc_dirfd < 0) { + rc = (errno == ESRCH) ? -ENOENT : -errno; + goto out; + } + + /* Try to get the comm now in case we kill this process. */ + (void)puc_comm(puc); + + *ppuc = puc; + puc = NULL; + rc = 0; +out: + puc_free(puc); + + return rc; +} + +static const char *puc_handle_type_str(const struct file_handle *handle) +{ + switch (handle->handle_type) { + case PU_FILEID_LUSTRE: + return "lustre"; + default: + return "-"; + } +} + +static char *puc_handle_to_str(const struct file_handle *handle) +{ + const struct pu_lustre_nfs_fid *lnf; + const size_t str_size = 80; /* FID_NOBRACE_LEN == 40 */ + char *str; + + str = calloc(str_size, sizeof(str[0])); + if (str == NULL) + OOM(); + + str[0] = '-'; + + if (handle->handle_type != PU_FILEID_LUSTRE) + goto out; + + if (handle->handle_bytes != sizeof(*lnf)) + goto out; + + lnf = (const struct pu_lustre_nfs_fid *)(&handle->f_handle[0]); + snprintf(str, str_size, "[%#llx:0x%x:0x%x]", + (unsigned long long)lnf->lnf_child.f_seq, + (unsigned int)lnf->lnf_child.f_oid, + (unsigned int)lnf->lnf_child.f_ver); +out: + return str; +} + +/* dirfd+name is one of the magical /proc/$pid/{cwd,exe,root}, + * /proc/$pid/fd/[0-9]+, or /proc/$pid/map_files/[range] symlinks. + * + * Return 1 if we are done with process $pid. */ +static int pu_check_link(struct pu_ctl *puc, int dirfd, const char *print_name, const char *name, int mount_id) +{ + struct file_handle *handle = NULL; + char *handle_str = NULL; + char *target_esc = NULL; + int target_mount_id = -1; + int rc; + + rc = pu_name_to_handle(dirfd, name, &handle, &target_mount_id, AT_SYMLINK_FOLLOW); + if (rc == -ENOENT || rc == -ESRCH) { + /* Some link targets do not exist for kernel tasks. This + * could also be a normal race. */ + rc = 0; + goto out; + } + + if (rc < 0) { + PU_ERROR("cannot get mount id for PID %d, link '%s': %s\n", + puc->puc_pid, name, strerror(-rc)); + puc->puc_err_count++; + goto out; + } + + if (target_mount_id != mount_id) { + rc = 0; + goto out; + } + + puc->puc_use_count++; + + if (puc->puc_print) { + handle_str = puc_handle_to_str(handle); + pu_readlink_at(dirfd, name, &target_esc, true); + + printf("%s\t%d\t%s\t%s\t%s\t%s\n", + puc_comm(puc), + puc->puc_pid, + print_name != NULL ? print_name : name, + puc_handle_type_str(handle), + handle_str, + target_esc != NULL ? target_esc : "-"); + + rc = 0; /* Since we're printing uses we need to find all uses. */ + } else { + rc = 1; /* Not printing so stop after finding first use. */ + } +out: + free(handle); + free(handle_str); + free(target_esc); + + return rc; +} + +/* Check /proc/[pid]/fd/[0-9]+ or /proc/[pid]/map_files/[range] symlinks for mount_id. + * pid_dirfd is /proc/$pid + * path is "fd" or "map_files" + */ +static int pu_check_links_dir(struct pu_ctl *puc, int pid_dirfd, const char *print_name, const char *path, int mount_id) +{ + DIR *links_dir = NULL; + int links_dirfd = -1; + struct dirent *d; + int rc; + + rc = pu_opendir_at(pid_dirfd, path, &links_dir, &links_dirfd); + if (rc < 0) /* ... normal races. */ + goto out; + + while ((d = readdir(links_dir)) != NULL) { + if (d->d_type != DT_LNK) + continue; + + rc = pu_check_link(puc, links_dirfd, print_name, d->d_name, mount_id); + if (rc != 0) + goto out; + } +out: + pu_closedir(&links_dir); + pu_close(&links_dirfd); + + return rc; +} + +static int puc_check(struct pu_ctl *puc, int mount_id) +{ + int pid_dirfd = puc->puc_dirfd; /* /proc/$pid */ + int rc; + + /* Be mostly consistent with the file names used by lsof. */ + rc = pu_check_link(puc, pid_dirfd, "cwd", "cwd", mount_id); + if (rc != 0) + return rc; + + rc = pu_check_link(puc, pid_dirfd, "txt", "exe", mount_id); + if (rc != 0) + return rc; + + rc = pu_check_link(puc, pid_dirfd, "rtd", "root", mount_id); + if (rc != 0) + return rc; + + rc = pu_check_links_dir(puc, pid_dirfd, "map", "map_files", mount_id); + if (rc != 0) + return rc; + + rc = pu_check_links_dir(puc, pid_dirfd, NULL, "fd", mount_id); + if (rc != 0) + return rc; + + return 0; +} + +enum { + PU_OPT_DEBUG = 1, + PU_OPT_DRY_RUN, + PU_OPT_NO_OPEN, + PU_OPT_NO_SIGNAL, + PU_OPT_NO_UMOUNT, + PU_OPT_PRINT, + PU_OPT_SCAN_COUNT, + PU_OPT_SIGNAL, + PU_OPT_VERSION, +}; + +static struct option options[] = { + { "debug", no_argument, NULL, PU_OPT_DEBUG }, + { "dry-run", no_argument, NULL, PU_OPT_DRY_RUN }, + { "force", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { "lazy", no_argument, NULL, 'l' }, /* Accepted for compatability. */ + { "no-open", no_argument, NULL, PU_OPT_NO_OPEN }, + { "no-signal", no_argument, NULL, PU_OPT_NO_SIGNAL }, + { "no-umount", no_argument, NULL, PU_OPT_NO_UMOUNT }, + { "print", no_argument, NULL, PU_OPT_PRINT }, + { "scan-count", required_argument, NULL, PU_OPT_SCAN_COUNT }, + { "signal", required_argument, NULL, PU_OPT_SIGNAL }, + { "version", no_argument, NULL, PU_OPT_VERSION }, + { NULL }, +}; + +void usage(void) +{ + printf( +"Usage pumount [OPTION]... DIRECTORY\n" +"Lazily unmount the filesystem (FS) mounted at DIRECTORY and signal any\n" +"processes still using FS.\n" +"\n" +"Mandatory arguments to long options are mandatory for short options too.\n" +" --debug enable debugging oupput\n" +" --dry-run equivalent to '--no-signal --no-umount --print'\n" +" -f, --force force unmount FS\n" +" --h, --help print this help message and exit\n" +" --no-open do not open DIRECTORY\n" +" --no-signal do not send signals\n" +" --no-umount do not unmount FS\n" +" -p, --print print users of FS\n" +" --scan-count=COUNT scan for users at most COUNT times\n" /* ... (default %d) */ +" --signal=SIG send SIG to users of FS (default SIGKILL)\n" +" --version print version information and exit\n" +"\n" +"Operation:\n" +"1. Open '/proc' for scanning.\n" +"2. Open DIRECTORY and use name_to_handle_at() to get mount id of FS\n" +" (unless --no-open).\n" +"3. Start a lazy unmount of DIRECTORY (unless --no-mount).\n" +"4. Scan for '/proc' for processed using FS. For each process directory\n" +" ('/proc/[pid]') other than its own, pumount will:\n" +" a. Open the process directory.\n" +" b. Determine if the process is using any files belonging to FS.\n" +"\n" +" pumount detects files belonging to FS by calling\n" +" name_to_handle_at() on the magical procfs symlinks\n" +" (proc/[pid]/{exe,cwd,root}, /proc/[pid]/fd/[fd] and\n" +" /proc/[pid]/map_files/[range] to get the mount id of the\n" +" file. Note that pumount does not access the files used by\n" +" the process. In particular, and unlike lsof, pumount does not\n" +" stat files.\n" +"\n" +" If printing (--print) is enabled then pumount prints the uses\n" +" of files belonging to FS in an lsof-like format. In this case\n" +" readlinkat() will be called on the procfs symlinks of used\n" +" files to expand their paths.\n" +"\n" +" c. If the process is using FS then send SIG to it (unless --no-signal).\n" +" d. Close the process directory.\n" +"\n" +"5. Repeat the scan (step 4) at most COUNT times, stopping if a scan\n" +" completed with no uses of FS detected.\n" +"6. Close the DIRECTORY file descriptor.\n" +"7. Close the '/proc' file descriptor.\n" +"\n" +"Notes:\n" +" pumount avoids mount id reuse races by holding DIRECTORY open until\n" +" step 6. Similarly, pumount avoids PID reeuse races by holding the\n" +" process directory open across 4b and 4c.\n" +"\n" +" When DIRECTORY is a Lustre client mount point, no RPCs will be sent\n" +" and no LDLM locks will be acquired on behalf of the actions in 4b.\n" +"\n" +"Examples:\n" +"Unmount the client mounted at /lustre/fs0a12/client/ and kill remaining\n" +"processes using it:\n" +"# pumount /lustre/fs0a12/client/\n" +"\n" +"Print the processes using /lustre/fs0a12/client:\n" +"# pumount --dry-run /lustre/fs0a12/client/\n" +"bash 8142 cwd lustre [0x200000404:0x5:0x0] /lustre/fs0a12/client/home/eggbert\n" +"eggrep 8446 cwd lustre [0x200000404:0x5:0x0] /lustre/fs0a12/client/home/eggbert\n" +"eggrep 8446 txt lustre [0x200000404:0x7:0x0] /lustre/fs0a12/client/home/eggbert/eggrep\n" +"eggrep 8446 map lustre [0x200000404:0x7:0x0] /lustre/fs0a12/client/home/eggbert/eggrep\n" +"eggrep 8446 0 lustre [0x200000404:0x8:0x0] /lustre/fs0a12/client/home/eggbert/EGGDATA.csv\n" +"eggrep 8446 1 lustre [0x200000404:0x6:0x0] /lustre/fs0a12/client/home/eggbert/results.txt\n" +"eggrep 8446 2 lustre [0x200000404:0x9:0x0] /lustre/fs0a12/client/home/eggbert/results.err\n" +"python 8942 0 lustre [0x200000404:0x3:0x0] /lustre/fs0a12/client/projects/EGGS\\040V7\\040FINAL.xls\n" +"\n" +"Exit status:\n" +" 0 Unmount succeeded and no users remained.\n" +" 1 Unmount failed or users remained or other error.\n" +" 2 An invalid option or argument was supplied.\n"); +/* TODO (maybe) add more statuses about users remaining, minor errors. ... */ +} + +int main(int argc, char *argv[]) +{ + pid_t pu_pid = -1; + const char *mount_point = NULL; + char *mount_point_canon = NULL; + char *mount_point_info = NULL; + int mount_dirfd = -1; + int mount_id = -1; + int proc_dirfd = -1; + DIR *proc_dir = NULL; + bool do_open = true; + bool do_print = false; + bool do_signal = true; + bool do_umount = true; + int sig = SIGKILL; + int umount2_flags = MNT_DETACH; + int proc_count; + int scan_count_max = 4; + int scan_count; + int use_count = 0; + int err_count = 0; + int status; + int rc; + int c; + + while ((c = getopt_long(argc, argv, "fhl", options, NULL)) != EOF) { + switch (c) { + case PU_OPT_DEBUG: + pu_debug = true; + break; + case PU_OPT_DRY_RUN: + do_print = true; + do_signal = false; + do_umount = false; + scan_count_max = 1; + break; + case 'f': + umount2_flags |= MNT_FORCE; + break; + case 'h': + usage(); + exit(EXIT_SUCCESS); + case 'l': + /* Already implied. */ + break; + case PU_OPT_NO_OPEN: + do_open = false; + break; + case PU_OPT_NO_SIGNAL: + do_signal = false; + break; + case PU_OPT_NO_UMOUNT: + do_umount = false; + break; + case PU_OPT_PRINT: + do_print = true; + break; + case PU_OPT_SCAN_COUNT: + rc = pu_strtoi(optarg, &scan_count_max, 0, INT_MAX); + if (rc < 0) + PU_FATAL("invalid scan-count '%s'\n", optarg); + break; + case PU_OPT_SIGNAL: + rc = pu_str_to_signal(optarg, &sig); + if (rc < 0) + PU_FATAL("invalid signal '%s'\n", optarg); + break; + case PU_OPT_VERSION: + printf("%s %s\n", pu_program_name, pu_version); + exit(EXIT_SUCCESS); + case '?': + TRY_HELP_1(); + } + } + + if (optind + 1 != argc) { + TRY_HELP("usage: %s [OPTION]... DIRECTORY\n", + pu_program_name); + } + + mount_point = argv[optind]; + + PU_DEBUG_B(do_open); + PU_DEBUG_B(do_print); + PU_DEBUG_B(do_signal); + PU_DEBUG_B(do_umount); + PU_DEBUG_D(scan_count_max); + PU_DEBUG_D(sig); + PU_DEBUG_D(umount2_flags); + PU_DEBUG_S(mount_point); + + rc = pu_opendir_at(AT_FDCWD, "/proc", &proc_dir, &proc_dirfd); + if (rc < 0) + PU_FATAL("cannot open '%s': %s\n", "/proc", strerror(errno)); + + if (do_open) { + mount_dirfd = open(mount_point, O_RDONLY|O_DIRECTORY); + if (mount_dirfd < 0) + PU_FATAL("cannot open '%s': %s\n", mount_point, strerror(errno)); + + rc = pu_fd_to_mount_id(mount_dirfd, &mount_id); + if (rc < 0) + PU_FATAL("cannot get mount id for '%s': %s\n", mount_point, strerror(-rc)); + } else { + rc = pu_name_to_mount_id(AT_FDCWD, mount_point, &mount_id, AT_SYMLINK_FOLLOW); + if (rc < 0) + PU_FATAL("cannot get mount id for '%s': %s\n", mount_point, strerror(-rc)); + } + + PU_DEBUG_D(proc_dirfd); + PU_DEBUG_D(mount_dirfd); + PU_DEBUG_D(mount_id); + + if (mount_id == PU_MOUNT_ID_FAKE) + PU_FATAL("'%s' does not support mount id, try umount instead\n", + mount_point); + + if (do_umount) { + rc = umount2(mount_point, umount2_flags); + if (rc < 0) + PU_FATAL("cannot umount '%s' (id %d): %s\n", + mount_point, mount_id, + errno == EINVAL ? "not mounted" : strerror(errno)); + } else { + /* umount2() will validate mount_point for us. Unless + * we don't call it. So check if mount_point and the + * moint point from mountinfo refer to the same + * file. If we don't check and mount_point was not + * mounted then mount_id will probably refer to '/' + * and we'll try to kill almost every process. */ + mount_point_canon = canonicalize_file_name(mount_point); + if (mount_point_canon == NULL) + PU_FATAL("cannot canonicalize '%s': %s:\n", + mount_point, strerror(errno)); + + rc = pu_get_mountinfo(proc_dirfd, mount_id, &mount_point_info); + if (rc < 0) + PU_FATAL("cannot find mountinfo entry for '%s' (id %d): %s:\n", + mount_point, mount_id, strerror(-rc)); + + PU_DEBUG_S(mount_point_canon); + PU_DEBUG_S(mount_point_info); + + /* It seems gross to comapre paths using strcmp() but + * this means we err on the side of safety. */ + if (strcmp(mount_point_canon, mount_point_info) != 0) + PU_FATAL("'%s' is not mounted\n", mount_point); + } + + pu_pid = getpid(); + PU_DEBUG_D(pu_pid); + + /* Scan /proc until we find no more uses of mount_id + * (proc_count == 0) or we reach scan_count_max. */ + for (proc_count = INT_MAX, scan_count = 0; + proc_count != 0 && scan_count < scan_count_max; + scan_count++) { + struct dirent *d; + + rewinddir(proc_dir); + proc_count = 0; + + while ((d = readdir(proc_dir)) != NULL) { + struct pu_ctl *puc = NULL; + pid_t pid; + + if (d->d_type != DT_DIR || !isdigit(d->d_name[0])) + goto next; + + rc = pu_strtoi(d->d_name, &pid, 1, INT_MAX); + if (rc < 0) { + PU_DEBUG("ignoring strange proc dirent '%s'\n", d->d_name); + goto next; + } + + if (pid == pu_pid) { + PU_DEBUG("ignoring own proc dirent '%s'\n", d->d_name); + goto next; + } + + rc = puc_create(&puc, proc_dirfd, pid, d->d_name); + if (rc < 0) { + if (rc != -ENOENT) { + PU_ERROR("cannot open '%s/%s': %s\n", + "/proc", d->d_name, strerror(-rc)); + err_count++; + } + + goto next; + } + + puc->puc_print = do_print; + + rc = puc_check(puc, mount_id); + if (rc < 0) + PU_ERROR("cannot get mount usage for process %d, comm '%s': %s\n", + pid, puc_comm(puc), strerror(-rc)); + + use_count += puc->puc_use_count; + err_count += puc->puc_err_count; + + if (puc->puc_use_count == 0) + goto next; + + PU_DEBUG("found process %d, comm '%s' with %d usesN of mount %d\n", + pid, puc_comm(puc), puc->puc_use_count, mount_id); + + proc_count += 1; + + if (!do_signal) + goto next; + + rc = kill(pid, sig); + PU_DEBUG("kill process %d, comm '%s', signal %d: rc = %d\n", + pid, puc_comm(puc), sig, rc < 0 ? -errno : 0); + + if (rc < 0 && errno != ESRCH) { + PU_ERROR("cannot kill process %d, comm '%s', signal %d: %s\n", + pid, puc_comm(puc), sig, strerror(errno)); + err_count++; + } + next: + puc_free(puc); + } + + PU_DEBUG_D(proc_count); + PU_DEBUG_D(scan_count); + PU_DEBUG_D(use_count); + PU_DEBUG_D(err_count); + + /* Give signaled processes a chance to exit. */ + sched_yield(); + } + + if (do_signal && proc_count != 0) { + PU_ERROR("%d process%s using '%s' (id %d) remained after %d scan%s\n", + proc_count, proc_count == 1 ? "" : "es", + mount_point, mount_id, + scan_count, scan_count == 1 ? "" : "s"); + status = EXIT_FAILURE; + } else { + status = EXIT_SUCCESS; + } + + /* If there are no other users of FS then the real umount + * should happen in the close of mount_dirfd. */ + pu_close(&mount_dirfd); + pu_closedir(&proc_dir); + pu_close(&proc_dirfd); + free(mount_point_canon); + free(mount_point_info); + + exit(status); +} -- 1.8.3.1