From: Fan Yong <fan.yong@intel.com>
Date: Tue, 21 Jun 2016 23:52:26 +0000 (+0800)
Subject: LU-8361 lfsck: detect Lustre device automatically
X-Git-Tag: 2.8.58~36
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=a0f7174c4106104f45977eeec7338e8f7fd1dafa;p=fs%2Flustre-release.git

LU-8361 lfsck: detect Lustre device automatically

Originally, when start/stop/query LFSCK, the user needs to
specify the Lustre device via "-M" option explicitly. Even
if there is only single Lustre device on current server or
the user wants to start the LFSCK on all devices with the
"-A" option specified, the "-M" option is still required.
Such requirement is inconvenient. This patch enhances the
LFSCK user interfaces to allow the user to run the LFSCK
commands without "-M" specified. Instead, it will select
the available Lustre device on current server automatically.
But under the following cases the "-M" option is still
required: if there are multiple devices on current server
those belong to different Lustre filesystems, or if "-A"
option is not specified and there are multiple devices on
current server.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I291b958440b2409c93cdc8ef3a5e3fbe14885141
Reviewed-on: http://review.whamcloud.com/21596
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---

diff --git a/lustre/doc/lctl-lfsck-query.8 b/lustre/doc/lctl-lfsck-query.8
index 9b97ff6..bf2c69b 100644
--- a/lustre/doc/lctl-lfsck-query.8
+++ b/lustre/doc/lctl-lfsck-query.8
@@ -1,11 +1,15 @@
 .TH lctl-lfsck-stop 8 "2016 Apr 1" Lustre "Lustre online fsck"
 .SH SYNOPSIS
 .br
-.B lctl lfsck_query \fR<-M | --device MDT_device> [-h | --help]
+.B lctl lfsck_query \fR[-M | --device MDT_device] [-h | --help]
             \fR[-t | --type lfsck_type[,lfsck_type...]] [-w | --wait]
 .br
 .SH DESCRIPTION
-Get the LFSCK global status via the specified MDT device.
+Get the LFSCK global status via the specified MDT device. If "-M" option
+is not specified, it will select the available Lustre device on current
+server automatically. But if there are multiple devices on current server
+those belong to different Lustre filesystems, you need to specify the
+device (see "-M" option) explicitly.
 .SH OPTIONS
 .TP
 .B  -M, --device <MDT_device>
diff --git a/lustre/doc/lctl-lfsck-start.8 b/lustre/doc/lctl-lfsck-start.8
index b5cafda..b2aa2d7 100644
--- a/lustre/doc/lctl-lfsck-start.8
+++ b/lustre/doc/lctl-lfsck-start.8
@@ -1,7 +1,7 @@
 .TH lctl-lfsck-start 8 "2016 Apr 1" Lustre "Lustre online fsck"
 .SH SYNOPSIS
 .br
-.B lctl lfsck_start \fR<-M | --device [MDT,OST]_device>
+.B lctl lfsck_start \fR[-M | --device [MDT,OST]_device]
      \fR[-A | --all] [-c | --create-ostobj [on | off]]
      \fR[-C | --create-mttobj [on | off]]
      \fR[-e | --error <continue | abort>] [-h | --help]
@@ -15,11 +15,16 @@ Start LFSCK on the specified MDT or OST device with specified parameters.
 .SH OPTIONS
 .TP
 .B  -M, --device <MDT,OST_device>
-The MDT or OST device to start LFSCK/scrub on.
+The MDT or OST device to start LFSCK/scrub on. If "-M" option is not specified,
+it will select the available Lustre device on current server automatically.
+But under the following cases you need to specify the device (see "-M" option)
+explicitly: if there are multiple devices on current server those belong to
+different Lustre filesystems, or if "-A" option is not specified and there are
+multiple devices on current server.
 .TP
 .B  -A, --all
-Start LFSCK on all nodes via the specified MDT device (see "-M" option) by
-single LFSCK command.
+Start LFSCK on all available devices in the system by single LFSCK command.
+Such LFSCK command can be executed on any MDT deivce in the system.
 .TP
 .B  -c, --create-ostobj [on | off]
 Create the lost OST-object for dangling LOV EA: 'off' (default) or 'on'. Under
diff --git a/lustre/doc/lctl-lfsck-stop.8 b/lustre/doc/lctl-lfsck-stop.8
index c3dd423..0cc4e59 100644
--- a/lustre/doc/lctl-lfsck-stop.8
+++ b/lustre/doc/lctl-lfsck-stop.8
@@ -1,18 +1,23 @@
 .TH lctl-lfsck-stop 8 "2016 Apr 1" Lustre "Lustre online fsck"
 .SH SYNOPSIS
 .br
-.B lctl lfsck_stop  \fR<-M | --device [MDT,OST]_device> [-A | --all] [-h | --help]
+.B lctl lfsck_stop  \fR[-M | --device [MDT,OST]_device] [-A | --all] [-h | --help]
 .br
 .SH DESCRIPTION
 Stop LFSCK on the specified MDT or OST device.
 .SH OPTIONS
 .TP
 .B  -M, --device <[MDT,OST]_device>
-The MDT or OST device to stop LFSCK/scrub on.
+The MDT or OST device to stop LFSCK/scrub on. If "-M" option is not specified,
+it will select the available Lustre device on current server automatically.
+But under the following cases you need to specify the device (see "-M" option)
+explicitly: if there are multiple devices on current server those belong to
+different Lustre filesystems, or if "-A" option is not specified and there are
+multiple devices on current server.
 .TP
 .B  -A, --all
-Stop LFSCK on all nodes via the specified MDT device (see "-M" option) by
-single LFSCK command.
+Stop LFSCK on all available devices in the system by single LFSCK command.
+Such LFSCK command can be executed on any MDT deivce in the system.
 .TP
 .B  -h, --help
 Show this help.
diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh
index 89b4274..8eca8e8 100644
--- a/lustre/tests/sanity-lfsck.sh
+++ b/lustre/tests/sanity-lfsck.sh
@@ -1382,9 +1382,9 @@ test_11b() {
 }
 run_test 11b "LFSCK can rebuild crashed last_id"
 
-test_12() {
+test_12a() {
 	[ $MDSCOUNT -lt 2 ] &&
-		skip "We need at least 2 MDSes for test_12" && return
+		skip "We need at least 2 MDSes for test_12a" && return
 
 	check_mount_and_prep
 	for k in $(seq $MDSCOUNT); do
@@ -1447,7 +1447,30 @@ test_12() {
 
 	stop_full_debug_logging
 }
-run_test 12 "single command to trigger LFSCK on all devices"
+run_test 12a "single command to trigger LFSCK on all devices"
+
+test_12b() {
+	check_mount_and_prep
+
+	echo "Start LFSCK without '-M' specified."
+	do_facet mds1 $LCTL lfsck_start -A -r ||
+		error "(0) Fail to start LFSCK without '-M'"
+
+	wait_all_targets_blocked namespace completed 1
+	wait_all_targets_blocked layout completed 2
+
+	local count=$(do_facet mds1 $LCTL dl |
+		      awk '{ print $3 }' | grep mdt | wc -l)
+	if [ $count -gt 1 ]; then
+		echo
+		echo "Start layout LFSCK on the node with multipe targets,"
+		echo "but not specify '-M'/'-A' option. Should get failure."
+		echo
+		do_facet mds1 $LCTL lfsck_start -t layout -r &&
+			error "(3) Start layout LFSCK should fail" || true
+	fi
+}
+run_test 12b "auto detect Lustre device"
 
 test_13() {
 	echo "#####"
diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c
index 4d00475..6d9a3a2 100644
--- a/lustre/utils/lctl.c
+++ b/lustre/utils/lctl.c
@@ -412,7 +412,7 @@ command_t cmdlist[] = {
 	/* LFSCK commands */
 	{"==== LFSCK ====", jt_noop, 0, "LFSCK"},
 	{"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n"
-	 "usage: lfsck_start <-M | --device [MDT,OST]_device>\n"
+	 "usage: lfsck_start [-M | --device [MDT,OST]_device]\n"
 	 "		     [-A | --all] [-c | --create-ostobj [on | off]]\n"
 	 "		     [-C | --create-mdtobj [on | off]]\n"
 	 "		     [-e | --error {continue | abort}] [-h | --help]\n"
@@ -421,10 +421,10 @@ command_t cmdlist[] = {
 	 "		     [-t | --type lfsck_type[,lfsck_type...]]\n"
 	 "		     [-w | --window-size size]"},
 	{"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
-	 "usage: lfsck_stop <-M | --device [MDT,OST]_device>\n"
+	 "usage: lfsck_stop [-M | --device [MDT,OST]_device]\n"
 	 "		    [-A | --all] [-h | --help]"},
 	{"lfsck_query", jt_lfsck_query, 0, "check lfsck(s) status\n"
-	 "usage: lfsck_query <-M | --device MDT_device> [-h | --help]\n"
+	 "usage: lfsck_query [-M | --device MDT_device] [-h | --help]\n"
 	 "		     [-t | --type lfsck_type[,lfsck_type...]]\n"
 	 "		     [-w | --wait]"},
 
diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c
index 7937ca1..2de436c 100644
--- a/lustre/utils/lustre_lfsck.c
+++ b/lustre/utils/lustre_lfsck.c
@@ -46,6 +46,7 @@
 #include <lustre_ioctl.h>
 /* Needs to be last to avoid clashes */
 #include <libcfs/util/ioctl.h>
+#include <libcfs/util/param.h>
 
 static struct option long_opt_start[] = {
 	{"device",		required_argument, 0, 'M'},
@@ -122,7 +123,7 @@ static void usage_start(void)
 {
 	fprintf(stderr, "start LFSCK\n"
 		"usage:\n"
-		"lfsck_start <-M | --device {MDT,OST}_device>\n"
+		"lfsck_start [-M | --device {MDT,OST}_device]\n"
 		"	     [-A | --all] [-c | --create_ostobj [on | off]]\n"
 		"	     [-C | --create_mdtobj [on | off]]\n"
 		"	     [-e | --error {continue | abort}] [-h | --help]\n"
@@ -154,7 +155,7 @@ static void usage_stop(void)
 {
 	fprintf(stderr, "stop LFSCK\n"
 		"usage:\n"
-		"lfsck_stop <-M | --device {MDT,OST}_device>\n"
+		"lfsck_stop [-M | --device {MDT,OST}_device]\n"
 		"           [-A | --all] [-h | --help]\n"
 		"options:\n"
 		"-M: device to stop LFSCK/scrub on\n"
@@ -167,7 +168,7 @@ static void usage_query(void)
 {
 	fprintf(stderr, "check the LFSCK global status\n"
 		"usage:\n"
-		"lfsck_query <-M | --device MDT_device> [-h | --help]\n"
+		"lfsck_query [-M | --device MDT_device] [-h | --help]\n"
 		"            [-t | --type check_type[,check_type...]]\n"
 		"            [-t | --wait]\n"
 		"options:\n"
@@ -194,6 +195,74 @@ static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg)
 	return 0;
 }
 
+static int lfsck_get_dev_name(struct obd_ioctl_data *data, char *device,
+			      int types, bool multipe_devices)
+{
+	glob_t param = { 0 };
+	char *ptr;
+	int rc;
+	int i;
+
+	rc = cfs_get_param_paths(&param, "mdd/*-MDT*");
+	if (rc) {
+		if (multipe_devices || errno != ENOENT ||
+		    types & LFSCK_TYPE_NAMESPACE) {
+			fprintf(stderr, "Fail to get device name: rc = %d\n."
+				"You can specify the device explicitly "
+				"via '-M' option.\n", rc);
+			return rc;
+		}
+
+		rc = cfs_get_param_paths(&param, "obdfilter/*-OST*");
+		if (rc) {
+			fprintf(stderr, "Fail to get device name: rc = %d\n."
+				"You can specify the device explicitly "
+				"via '-M' option.\n", rc);
+			return rc;
+		}
+	}
+
+	if (param.gl_pathc == 1)
+		goto pack;
+
+	if (!multipe_devices) {
+		fprintf(stderr,
+			"Detect multiple devices on current node. "
+			"Please specify the device explicitly "
+			"via '-M' option or '-A' option for all.\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	ptr = strrchr(param.gl_pathv[0], '-');
+	LASSERT(ptr != NULL);
+
+	for (i = 1; i < param.gl_pathc; i++) {
+		char *ptr2 = strrchr(param.gl_pathv[i], '-');
+
+		LASSERT(ptr2 != NULL);
+
+		if ((ptr - param.gl_pathv[0]) != (ptr2 - param.gl_pathv[i]) ||
+		    strncmp(param.gl_pathv[0], param.gl_pathv[i],
+			    (ptr - param.gl_pathv[0])) != 0) {
+			fprintf(stderr,
+				"Detect multiple filesystems on current node. "
+				"Please specify the device explicitly "
+				"via '-M' option.\n");
+			rc = -EINVAL;
+			goto out;
+		}
+	}
+
+pack:
+	rc = lfsck_pack_dev(data, device, basename(param.gl_pathv[0]));
+
+out:
+	cfs_free_param_data(&param);
+
+	return rc;
+}
+
 int jt_lfsck_start(int argc, char **argv)
 {
 	struct obd_ioctl_data data;
@@ -335,15 +404,10 @@ bad_type:
 		start.ls_active = LFSCK_TYPES_DEF;
 
 	if (data.ioc_inlbuf4 == NULL) {
-		if (lcfg_get_devname() != NULL) {
-			rc = lfsck_pack_dev(&data, device, lcfg_get_devname());
-			if (rc != 0)
-				return rc;
-		} else {
-			fprintf(stderr,
-				"Must specify device to start LFSCK.\n");
-			return -EINVAL;
-		}
+		rc = lfsck_get_dev_name(&data, device, start.ls_active,
+					start.ls_flags & LPF_ALL_TGT);
+		if (rc != 0)
+			return rc;
 	}
 
 	data.ioc_inlbuf1 = (char *)&start;
@@ -413,15 +477,10 @@ int jt_lfsck_stop(int argc, char **argv)
 	}
 
 	if (data.ioc_inlbuf4 == NULL) {
-		if (lcfg_get_devname() != NULL) {
-			rc = lfsck_pack_dev(&data, device, lcfg_get_devname());
-			if (rc != 0)
-				return rc;
-		} else {
-			fprintf(stderr,
-				"Must specify device to stop LFSCK.\n");
-			return -EINVAL;
-		}
+		rc = lfsck_get_dev_name(&data, device, 0,
+					stop.ls_flags & LPF_ALL_TGT);
+		if (rc != 0)
+			return rc;
 	}
 
 	data.ioc_inlbuf1 = (char *)&stop;
@@ -496,15 +555,9 @@ bad_type:
 	}
 
 	if (data.ioc_inlbuf4 == NULL) {
-		if (lcfg_get_devname() != NULL) {
-			rc = lfsck_pack_dev(&data, device, lcfg_get_devname());
-			if (rc != 0)
-				return rc;
-		} else {
-			fprintf(stderr,
-				"Must specify device to query LFSCK.\n");
-			return -EINVAL;
-		}
+		rc = lfsck_get_dev_name(&data, device, 0, true);
+		if (rc != 0)
+			return rc;
 	}
 
 	data.ioc_inlbuf1 = (char *)&query;