From: Fan Yong Date: Tue, 21 Jun 2016 23:52:26 +0000 (+0800) Subject: LU-8361 lfsck: detect Lustre device automatically X-Git-Tag: 2.8.58~36 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=a0f7174c4106104f45977eeec7338e8f7fd1dafa LU-8361 lfsck: detect Lustre device automatically Originally, when start/stop/query LFSCK, the user needs to specify the Lustre device via "-M" option explicitly. Even if there is only single Lustre device on current server or the user wants to start the LFSCK on all devices with the "-A" option specified, the "-M" option is still required. Such requirement is inconvenient. This patch enhances the LFSCK user interfaces to allow the user to run the LFSCK commands without "-M" specified. Instead, it will select the available Lustre device on current server automatically. But under the following cases the "-M" option is still required: if there are multiple devices on current server those belong to different Lustre filesystems, or if "-A" option is not specified and there are multiple devices on current server. Signed-off-by: Fan Yong Change-Id: I291b958440b2409c93cdc8ef3a5e3fbe14885141 Reviewed-on: http://review.whamcloud.com/21596 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- diff --git a/lustre/doc/lctl-lfsck-query.8 b/lustre/doc/lctl-lfsck-query.8 index 9b97ff6..bf2c69b 100644 --- a/lustre/doc/lctl-lfsck-query.8 +++ b/lustre/doc/lctl-lfsck-query.8 @@ -1,11 +1,15 @@ .TH lctl-lfsck-stop 8 "2016 Apr 1" Lustre "Lustre online fsck" .SH SYNOPSIS .br -.B lctl lfsck_query \fR<-M | --device MDT_device> [-h | --help] +.B lctl lfsck_query \fR[-M | --device MDT_device] [-h | --help] \fR[-t | --type lfsck_type[,lfsck_type...]] [-w | --wait] .br .SH DESCRIPTION -Get the LFSCK global status via the specified MDT device. +Get the LFSCK global status via the specified MDT device. If "-M" option +is not specified, it will select the available Lustre device on current +server automatically. But if there are multiple devices on current server +those belong to different Lustre filesystems, you need to specify the +device (see "-M" option) explicitly. .SH OPTIONS .TP .B -M, --device diff --git a/lustre/doc/lctl-lfsck-start.8 b/lustre/doc/lctl-lfsck-start.8 index b5cafda..b2aa2d7 100644 --- a/lustre/doc/lctl-lfsck-start.8 +++ b/lustre/doc/lctl-lfsck-start.8 @@ -1,7 +1,7 @@ .TH lctl-lfsck-start 8 "2016 Apr 1" Lustre "Lustre online fsck" .SH SYNOPSIS .br -.B lctl lfsck_start \fR<-M | --device [MDT,OST]_device> +.B lctl lfsck_start \fR[-M | --device [MDT,OST]_device] \fR[-A | --all] [-c | --create-ostobj [on | off]] \fR[-C | --create-mttobj [on | off]] \fR[-e | --error ] [-h | --help] @@ -15,11 +15,16 @@ Start LFSCK on the specified MDT or OST device with specified parameters. .SH OPTIONS .TP .B -M, --device -The MDT or OST device to start LFSCK/scrub on. +The MDT or OST device to start LFSCK/scrub on. If "-M" option is not specified, +it will select the available Lustre device on current server automatically. +But under the following cases you need to specify the device (see "-M" option) +explicitly: if there are multiple devices on current server those belong to +different Lustre filesystems, or if "-A" option is not specified and there are +multiple devices on current server. .TP .B -A, --all -Start LFSCK on all nodes via the specified MDT device (see "-M" option) by -single LFSCK command. +Start LFSCK on all available devices in the system by single LFSCK command. +Such LFSCK command can be executed on any MDT deivce in the system. .TP .B -c, --create-ostobj [on | off] Create the lost OST-object for dangling LOV EA: 'off' (default) or 'on'. Under diff --git a/lustre/doc/lctl-lfsck-stop.8 b/lustre/doc/lctl-lfsck-stop.8 index c3dd423..0cc4e59 100644 --- a/lustre/doc/lctl-lfsck-stop.8 +++ b/lustre/doc/lctl-lfsck-stop.8 @@ -1,18 +1,23 @@ .TH lctl-lfsck-stop 8 "2016 Apr 1" Lustre "Lustre online fsck" .SH SYNOPSIS .br -.B lctl lfsck_stop \fR<-M | --device [MDT,OST]_device> [-A | --all] [-h | --help] +.B lctl lfsck_stop \fR[-M | --device [MDT,OST]_device] [-A | --all] [-h | --help] .br .SH DESCRIPTION Stop LFSCK on the specified MDT or OST device. .SH OPTIONS .TP .B -M, --device <[MDT,OST]_device> -The MDT or OST device to stop LFSCK/scrub on. +The MDT or OST device to stop LFSCK/scrub on. If "-M" option is not specified, +it will select the available Lustre device on current server automatically. +But under the following cases you need to specify the device (see "-M" option) +explicitly: if there are multiple devices on current server those belong to +different Lustre filesystems, or if "-A" option is not specified and there are +multiple devices on current server. .TP .B -A, --all -Stop LFSCK on all nodes via the specified MDT device (see "-M" option) by -single LFSCK command. +Stop LFSCK on all available devices in the system by single LFSCK command. +Such LFSCK command can be executed on any MDT deivce in the system. .TP .B -h, --help Show this help. diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 89b4274..8eca8e8 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -1382,9 +1382,9 @@ test_11b() { } run_test 11b "LFSCK can rebuild crashed last_id" -test_12() { +test_12a() { [ $MDSCOUNT -lt 2 ] && - skip "We need at least 2 MDSes for test_12" && return + skip "We need at least 2 MDSes for test_12a" && return check_mount_and_prep for k in $(seq $MDSCOUNT); do @@ -1447,7 +1447,30 @@ test_12() { stop_full_debug_logging } -run_test 12 "single command to trigger LFSCK on all devices" +run_test 12a "single command to trigger LFSCK on all devices" + +test_12b() { + check_mount_and_prep + + echo "Start LFSCK without '-M' specified." + do_facet mds1 $LCTL lfsck_start -A -r || + error "(0) Fail to start LFSCK without '-M'" + + wait_all_targets_blocked namespace completed 1 + wait_all_targets_blocked layout completed 2 + + local count=$(do_facet mds1 $LCTL dl | + awk '{ print $3 }' | grep mdt | wc -l) + if [ $count -gt 1 ]; then + echo + echo "Start layout LFSCK on the node with multipe targets," + echo "but not specify '-M'/'-A' option. Should get failure." + echo + do_facet mds1 $LCTL lfsck_start -t layout -r && + error "(3) Start layout LFSCK should fail" || true + fi +} +run_test 12b "auto detect Lustre device" test_13() { echo "#####" diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 4d00475..6d9a3a2 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -412,7 +412,7 @@ command_t cmdlist[] = { /* LFSCK commands */ {"==== LFSCK ====", jt_noop, 0, "LFSCK"}, {"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n" - "usage: lfsck_start <-M | --device [MDT,OST]_device>\n" + "usage: lfsck_start [-M | --device [MDT,OST]_device]\n" " [-A | --all] [-c | --create-ostobj [on | off]]\n" " [-C | --create-mdtobj [on | off]]\n" " [-e | --error {continue | abort}] [-h | --help]\n" @@ -421,10 +421,10 @@ command_t cmdlist[] = { " [-t | --type lfsck_type[,lfsck_type...]]\n" " [-w | --window-size size]"}, {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n" - "usage: lfsck_stop <-M | --device [MDT,OST]_device>\n" + "usage: lfsck_stop [-M | --device [MDT,OST]_device]\n" " [-A | --all] [-h | --help]"}, {"lfsck_query", jt_lfsck_query, 0, "check lfsck(s) status\n" - "usage: lfsck_query <-M | --device MDT_device> [-h | --help]\n" + "usage: lfsck_query [-M | --device MDT_device] [-h | --help]\n" " [-t | --type lfsck_type[,lfsck_type...]]\n" " [-w | --wait]"}, diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c index 7937ca1..2de436c 100644 --- a/lustre/utils/lustre_lfsck.c +++ b/lustre/utils/lustre_lfsck.c @@ -46,6 +46,7 @@ #include /* Needs to be last to avoid clashes */ #include +#include static struct option long_opt_start[] = { {"device", required_argument, 0, 'M'}, @@ -122,7 +123,7 @@ static void usage_start(void) { fprintf(stderr, "start LFSCK\n" "usage:\n" - "lfsck_start <-M | --device {MDT,OST}_device>\n" + "lfsck_start [-M | --device {MDT,OST}_device]\n" " [-A | --all] [-c | --create_ostobj [on | off]]\n" " [-C | --create_mdtobj [on | off]]\n" " [-e | --error {continue | abort}] [-h | --help]\n" @@ -154,7 +155,7 @@ static void usage_stop(void) { fprintf(stderr, "stop LFSCK\n" "usage:\n" - "lfsck_stop <-M | --device {MDT,OST}_device>\n" + "lfsck_stop [-M | --device {MDT,OST}_device]\n" " [-A | --all] [-h | --help]\n" "options:\n" "-M: device to stop LFSCK/scrub on\n" @@ -167,7 +168,7 @@ static void usage_query(void) { fprintf(stderr, "check the LFSCK global status\n" "usage:\n" - "lfsck_query <-M | --device MDT_device> [-h | --help]\n" + "lfsck_query [-M | --device MDT_device] [-h | --help]\n" " [-t | --type check_type[,check_type...]]\n" " [-t | --wait]\n" "options:\n" @@ -194,6 +195,74 @@ static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg) return 0; } +static int lfsck_get_dev_name(struct obd_ioctl_data *data, char *device, + int types, bool multipe_devices) +{ + glob_t param = { 0 }; + char *ptr; + int rc; + int i; + + rc = cfs_get_param_paths(¶m, "mdd/*-MDT*"); + if (rc) { + if (multipe_devices || errno != ENOENT || + types & LFSCK_TYPE_NAMESPACE) { + fprintf(stderr, "Fail to get device name: rc = %d\n." + "You can specify the device explicitly " + "via '-M' option.\n", rc); + return rc; + } + + rc = cfs_get_param_paths(¶m, "obdfilter/*-OST*"); + if (rc) { + fprintf(stderr, "Fail to get device name: rc = %d\n." + "You can specify the device explicitly " + "via '-M' option.\n", rc); + return rc; + } + } + + if (param.gl_pathc == 1) + goto pack; + + if (!multipe_devices) { + fprintf(stderr, + "Detect multiple devices on current node. " + "Please specify the device explicitly " + "via '-M' option or '-A' option for all.\n"); + rc = -EINVAL; + goto out; + } + + ptr = strrchr(param.gl_pathv[0], '-'); + LASSERT(ptr != NULL); + + for (i = 1; i < param.gl_pathc; i++) { + char *ptr2 = strrchr(param.gl_pathv[i], '-'); + + LASSERT(ptr2 != NULL); + + if ((ptr - param.gl_pathv[0]) != (ptr2 - param.gl_pathv[i]) || + strncmp(param.gl_pathv[0], param.gl_pathv[i], + (ptr - param.gl_pathv[0])) != 0) { + fprintf(stderr, + "Detect multiple filesystems on current node. " + "Please specify the device explicitly " + "via '-M' option.\n"); + rc = -EINVAL; + goto out; + } + } + +pack: + rc = lfsck_pack_dev(data, device, basename(param.gl_pathv[0])); + +out: + cfs_free_param_data(¶m); + + return rc; +} + int jt_lfsck_start(int argc, char **argv) { struct obd_ioctl_data data; @@ -335,15 +404,10 @@ bad_type: start.ls_active = LFSCK_TYPES_DEF; if (data.ioc_inlbuf4 == NULL) { - if (lcfg_get_devname() != NULL) { - rc = lfsck_pack_dev(&data, device, lcfg_get_devname()); - if (rc != 0) - return rc; - } else { - fprintf(stderr, - "Must specify device to start LFSCK.\n"); - return -EINVAL; - } + rc = lfsck_get_dev_name(&data, device, start.ls_active, + start.ls_flags & LPF_ALL_TGT); + if (rc != 0) + return rc; } data.ioc_inlbuf1 = (char *)&start; @@ -413,15 +477,10 @@ int jt_lfsck_stop(int argc, char **argv) } if (data.ioc_inlbuf4 == NULL) { - if (lcfg_get_devname() != NULL) { - rc = lfsck_pack_dev(&data, device, lcfg_get_devname()); - if (rc != 0) - return rc; - } else { - fprintf(stderr, - "Must specify device to stop LFSCK.\n"); - return -EINVAL; - } + rc = lfsck_get_dev_name(&data, device, 0, + stop.ls_flags & LPF_ALL_TGT); + if (rc != 0) + return rc; } data.ioc_inlbuf1 = (char *)&stop; @@ -496,15 +555,9 @@ bad_type: } if (data.ioc_inlbuf4 == NULL) { - if (lcfg_get_devname() != NULL) { - rc = lfsck_pack_dev(&data, device, lcfg_get_devname()); - if (rc != 0) - return rc; - } else { - fprintf(stderr, - "Must specify device to query LFSCK.\n"); - return -EINVAL; - } + rc = lfsck_get_dev_name(&data, device, 0, true); + if (rc != 0) + return rc; } data.ioc_inlbuf1 = (char *)&query;