From 1121816c4a4e1bb2ef097c4a9802362181c43800 Mon Sep 17 00:00:00 2001 From: Stephane Thiell Date: Mon, 8 Feb 2021 22:47:31 -0800 Subject: [PATCH] LU-7668 utils: add lctl del_ost Add helper command: lctl del_ost [--dryrun] --target fsname-OSTxxxx Permanently disable an OST by altering the client and MDT llog catalogs on MGS. The command finds all catalog records related to the specified OST and cancel them. A --dryrun option is provided so that the system administrator can see which records would have been cancelled, but without actually cancelling them. Signed-off-by: Stephane Thiell Change-Id: I58c4f10fa0f7164a40231e807698eb224cccf062 Reviewed-on: https://review.whamcloud.com/41449 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- lustre/doc/Makefile.am | 1 + lustre/doc/lctl-del_ost.8 | 71 ++++++++++++ lustre/tests/conf-sanity.sh | 66 ++++++++++- lustre/utils/lctl.c | 3 + lustre/utils/obd.c | 267 ++++++++++++++++++++++++++++++++++++++++---- lustre/utils/obdctl.h | 1 + 6 files changed, 385 insertions(+), 24 deletions(-) create mode 100644 lustre/doc/lctl-del_ost.8 diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 06c326f..0da96cd 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -198,6 +198,7 @@ SERVER_MANFILES = \ lctl-llog_catlist.8 \ lctl-llog_info.8 \ lctl-llog_print.8 \ + lctl-del_ost.8 \ lctl-nodemap-activate.8 \ lctl-nodemap-add-idmap.8 \ lctl-nodemap-add-range.8 \ diff --git a/lustre/doc/lctl-del_ost.8 b/lustre/doc/lctl-del_ost.8 new file mode 100644 index 0000000..e5fab71 --- /dev/null +++ b/lustre/doc/lctl-del_ost.8 @@ -0,0 +1,71 @@ +.TH lctl-del_ost 8 "2022-06-13" Lustre "configuration utilities" +.SH NAME +lctl del_ost \- permanently delete OST records +.SH SYNOPSIS +.B lctl del_ost +.RB "[ --dryrun ]" +.RB --target +.IR OST_LABEL + +.SH DESCRIPTION +.B lctl del_ost +cancels the config records for an OST specified by +.I OST_LABEL +to permanently forget about it. It should be run on the MGS and will +search for the specified OST in the Lustre configuration log files for +all MDTs and clients. If +.RB --dryrun +is not specified, it will modify the configuration log files to +permanently disable the OST records. Newly mounted MDTs and clients +will not process deleted OSTs anymore. This method allows to permanently +remove OSTs on a live filesystem without the use of --writeconf with +.BR tunefs.lustre (8). + +Before using this command, you probably want to migrate +file objects still on the OST by using +.BR lfs-migrate (1) +or +.BR lfs_migrate (1) +and make sure that there are no remaining file objects on the +OST by using +.BR lfs-find (1) +which can find files by OST index. +It is then recommended to first deactivate the OST using +.BR lctl (8) +with: +.TP +.B $ lctl conf_param lustre-OST0001.osc.active=0 +.TP +and finally use \fBlctl del_ost\fR to remove it from the configuration logs. +.TP +.SH OPTIONS +.TP +\fB\-n\fR, \fB\-\-dryrun\fR +An option provided so that the system administrator can see which +records would have been cancelled, but without actually cancelling them. +When deleting OST records, it is recommended to first use this option +and check that there are no errors. +.TP +\fB\-t\fR, \fB\-\-target\fR +This option is required and used to specify the OST label to permanently +delete from the records. The target label should be of the form +.BR "fsname-OST0a19" . +.TP +.SH EXAMPLES +.TP +.B $ lctl --device MGS del_ost --target lustre-OST0001 +.SH AVAILABILITY +.B lctl del_ost +is a subcommand of +.BR lctl (8) +and is distributed as part of the +.BR lustre (7) +filesystem package. +.SH SEE ALSO +.BR lctl (8), +.BR lfs-find (1), +.BR lfs-migrate (1), +.BR lfs_migrate (1), +.BR lctl-llog_cancel (8), +.BR tunefs.lustre (8), +.BR lustre (7) diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index b7c8d30..80ade3e 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -2985,7 +2985,7 @@ test_33a() { # bug 12333, was test_33 --reformat $mgs_flag $mkfsoptions $fs2mdsdev $fs2mdsvdev || exit 10 add fs2ost $(mkfs_opts ost1 ${fs2ostdev}) --mgsnode=$MGSNID \ - --fsname=${FSNAME2} --index=8191 --reformat $fs2ostdev \ + --fsname=${FSNAME2} --index=0x1fff --reformat $fs2ostdev \ $fs2ostvdev || exit 10 start fs2mds $fs2mdsdev $MDS_MOUNT_OPTS && trap cleanup_fs2 EXIT INT @@ -3007,6 +3007,15 @@ test_33a() { # bug 12333, was test_33 error "$LFS getstripe $MOUNT2/hosts failed" umount $MOUNT2 + + # test lctl del_ost on large index + do_facet mgs "$LCTL del_ost -t ${FSNAME2}-OST1fff" || + error "del_ost failed with $?" + $MOUNT_CMD $MGSNID:/${FSNAME2} $MOUNT2 || error "$MOUNT_CMD failed" + echo "ok." + $LFS df | grep -q OST1fff && error "del_ost did not remove OST1fff!" + umount $MOUNT2 + stop fs2ost -f stop fs2mds -f cleanup_nocli || error "cleanup_nocli failed with $?" @@ -9531,6 +9540,61 @@ test_123ag() { # LU-15142 } run_test 123ag "llog_print skips values deleted by set_param -P -d" +test_123ah() { #LU-7668 del_ost + [ "$MGS_VERSION" -ge $(version_code 2.15.50) -a \ + "$MDS1_VERSION" -ge $(version_code 2.15.50) ] || + skip "Need both MGS and MDS version at least 2.15.50" + + [ -d $MOUNT/.lustre ] || setupall + stack_trap "do_facet mds1 $LCTL set_param fail_loc=0" EXIT + + local cmd="--device MGS llog_print" + + cli_llogcnt_orig=$(do_facet mgs $LCTL $cmd $FSNAME-client | + grep -c $FSNAME-OST0000) + mds1_llogcnt_orig=$(do_facet mgs $LCTL $cmd $FSNAME-MDT0000 | + grep -c $FSNAME-OST0000) + + [ $cli_llogcnt_orig -gt 0 ] || + error "$FSNAME-OST0000 not found (client)" + [ $mds1_llogcnt_orig -gt 0 ] || error "$FSNAME-OST0000 not found (MDT)" + + # -n/--dryrun should NOT modify catalog + do_facet mgs "$LCTL del_ost -n -t $FSNAME-OST0000" || + error "del_ost --dryrun failed with $?" + + local cli_llogcnt=$(do_facet mgs $LCTL $cmd $FSNAME-client | + grep -c $FSNAME-OST0000) + local mds1_llogcnt=$(do_facet mgs $LCTL $cmd $FSNAME-MDT0000 | + grep -c $FSNAME-OST0000) + + [ $cli_llogcnt -eq $cli_llogcnt_orig ] || + error "--dryrun error: $cli_llogcnt != $cli_llogcnt_orig" + [ $mds1_llogcnt -eq $mds1_llogcnt_orig ] || + error "--dryrun error: $mds1_llogcnt != $mds1_llogcnt_orig" + + # actual run + do_facet mgs "$LCTL del_ost --target $FSNAME-OST0000" || + error "del_ost failed with $?" + + local cli_llogcnt=$(do_facet mgs $LCTL $cmd $FSNAME-client | + grep -c $FSNAME-OST0000) + local mds1_llogcnt=$(do_facet mgs $LCTL $cmd $FSNAME-MDT0000 | + grep -c $FSNAME-OST0000) + + # every catalog entry for OST0000 should have been cancelled + [ $cli_llogcnt -eq 0 ] || error "$FSNAME-OST0000 not cancelled (cli)" + [ $mds1_llogcnt -eq 0 ] || error "$FSNAME-OST0000 not cancelled (MDT)" + + umount_client $MOUNT + mount_client $MOUNT + + $LFS df | grep -q OST0000 && error "del_ost did not remove OST0000!" + cleanup + reformat_and_config +} +run_test 123ah "del_ost cancels config log entries correctly" + test_123F() { remote_mgs_nodsh && skip "remote MGS with nodsh" diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 600ccf5..1765661 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -236,6 +236,9 @@ command_t cmdlist[] = { "respectively.\n" " -D Only list directories.\n" " -R Recursively list all parameters under the specified path.\n"}, + {"del_ost", jt_del_ost, 0, "permanently delete OST records\n" + "usage: del_ost [--dryrun] --target <$fsname-OSTxxxx>\n" + "Cancel the config records for a specific OST to forget about it.\n"}, /* Debug commands */ {"==== debugging control ====", NULL, 0, "debug"}, diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 3f71f03..328c7df 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -2688,13 +2688,36 @@ static int llog_default_device(enum llog_default_dev_op op) return rc; } -int jt_llog_catlist(int argc, char **argv) +static int llog_catlist_next(int index, char *buf, size_t buflen) { struct obd_ioctl_data data; + int rc; + + memset(&data, 0, sizeof(data)); + data.ioc_dev = cur_device; + data.ioc_inllen1 = buflen - __ALIGN_KERNEL(sizeof(data), 8); + data.ioc_count = index; + memset(buf, 0, buflen); + rc = llapi_ioctl_pack(&data, &buf, buflen); + if (rc < 0) { + fprintf(stderr, "error: invalid llapi_ioctl_pack: %s\n", + strerror(errno)); + return rc; + } + rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CATLOGLIST, buf); + if (rc < 0) { + fprintf(stderr, "OBD_IOC_CATLOGLIST failed: %s\n", + strerror(errno)); + return rc; + } + return ((struct obd_ioctl_data *)buf)->ioc_count; +} + +int jt_llog_catlist(int argc, char **argv) +{ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; char *tmp = NULL; int start = 0; - int rc; if (argc != 1) return CMD_HELP; @@ -2703,37 +2726,19 @@ int jt_llog_catlist(int argc, char **argv) return CMD_INCOMPLETE; do { - memset(&data, 0, sizeof(data)); - data.ioc_dev = cur_device; - data.ioc_inllen1 = sizeof(rawbuf) - - __ALIGN_KERNEL(sizeof(data), 8); - data.ioc_count = start; - memset(buf, 0, sizeof(rawbuf)); - rc = llapi_ioctl_pack(&data, &buf, sizeof(rawbuf)); - if (rc) { - fprintf(stderr, "error: %s: invalid ioctl\n", - jt_cmdname(argv[0])); - goto err; - } - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CATLOGLIST, buf); - if (rc < 0) + start = llog_catlist_next(start, rawbuf, sizeof(rawbuf)); + if (start < 0) break; tmp = ((struct obd_ioctl_data *)buf)->ioc_bulk; if (strlen(tmp) > 0) fprintf(stdout, "%s", tmp); else break; - start = ((struct obd_ioctl_data *)buf)->ioc_count; } while (start); - if (rc < 0) - fprintf(stderr, "OBD_IOC_CATLOGLIST failed: %s\n", - strerror(errno)); - -err: llog_default_device(LLOG_DFLT_DEV_RESET); - return rc; + return start; } int jt_llog_info(int argc, char **argv) @@ -3493,6 +3498,113 @@ static int llog_search_ost(char *logname, long last_index, char *ostname) return (rc == 1 ? 1 : 0); } +struct llog_del_ost_priv { + char *logname; + char *ostname; + int found; + int dryrun; +}; + +/** + * Callback to search and delete ostname in llog + * + * \param record[in] pointer to llog record + * \param data[in] pointer to ostname + * + * \retval 1 if ostname is found and entry deleted + * 0 if ostname is not found + * < 0 if error + */ +static int llog_del_ost_cb(const char *record, void *data) +{ + char ost_filter[MAX_STRING_SIZE] = {'\0'}; + char log_idxstr[MAX_STRING_SIZE] = {'\0'}; + long int log_idx = 0; + struct llog_del_ost_priv *priv = data; + char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; + struct obd_ioctl_data ioc_data = { 0 }; + int rc = 0; + + if (priv->ostname && priv->ostname[0]) + snprintf(ost_filter, sizeof(ost_filter), " %s", priv->ostname); + + if (!strstr(record, ost_filter)) + return rc; + + rc = sscanf(record, "- { index: %ld", &log_idx); + if (rc < 0) { + fprintf(stderr, "error: record without index:\n%s\n", + record); + return 0; + } + snprintf(log_idxstr, sizeof(log_idxstr), "%ld", log_idx); + + ioc_data.ioc_dev = cur_device; + ioc_data.ioc_inllen1 = strlen(priv->logname) + 1; + ioc_data.ioc_inlbuf1 = priv->logname; + ioc_data.ioc_inllen3 = strlen(log_idxstr) + 1; + ioc_data.ioc_inlbuf3 = log_idxstr; + + rc = llapi_ioctl_pack(&ioc_data, &buf, sizeof(rawbuf)); + if (rc) { + fprintf(stderr, "ioctl_pack for catalog '%s' failed: %s\n", + ioc_data.ioc_inlbuf1, strerror(-rc)); + return rc; + } + + if (priv->dryrun) { + fprintf(stdout, "[DRY RUN] cancel catalog '%s:%s':\"%s\"\n", + ioc_data.ioc_inlbuf1, ioc_data.ioc_inlbuf3, record); + } else { + rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LLOG_CANCEL, buf); + if (rc) + fprintf(stderr, "cancel catalog '%s:%s' failed: %s\n", + ioc_data.ioc_inlbuf1, ioc_data.ioc_inlbuf3, + strerror(errno)); + else { + fprintf(stdout, "cancel catalog %s log_idx %ld: done\n", + priv->logname, log_idx); + priv->found++; + } + } + return rc; +} + +/** + * Search and delete ost in llog + * + * \param logname[in] pointer to config log name + * \param last_index[in] the index of the last llog record + * \param ostname[in] pointer to ost name + * \param dryrun[in] dry run? + * + * \retval 1 if ostname is found and deleted + * 0 if ostname is not found + */ +static int llog_del_ost(char *logname, long last_index, char *ostname, + int dryrun) +{ + long start, end, inc = MAX_IOC_BUFLEN / 128; + int rc = 0; + struct llog_del_ost_priv priv = { logname, ostname, false, dryrun }; + + for (end = last_index; end > 1; end -= inc) { + start = end - inc > 0 ? end - inc : 1; + rc = jt_llog_print_iter(logname, start, end, llog_del_ost_cb, + &priv, true, false); + if (rc) + break; + } + + if (priv.found) + fprintf(stdout, "del_ost: cancelled %d catalog entries\n", + priv.found); + else + fprintf(stdout, "del_ost: no catalog entry deleted\n"); + + return rc; +} + struct llog_pool_data { char lpd_fsname[LUSTRE_MAXFSNAME + 1]; char lpd_poolname[LOV_MAXPOOLNAME + 1]; @@ -3974,6 +4086,115 @@ out: return rc; } +int jt_del_ost(int argc, char **argv) +{ + char *fsname = NULL, *ptr, *logname; + char mdtpattern[16], clipattern[16]; + char ostname[MAX_OBD_NAME + 1]; + long last_index; + __u32 index; + int rc, start = 0, dryrun = 0; + char c; + + static struct option long_opts[] = { + { .val = 'h', .name = "help", .has_arg = no_argument }, + { .val = 'n', .name = "dryrun", .has_arg = no_argument }, + { .val = 't', .name = "target", .has_arg = required_argument }, + { .name = NULL } }; + + while ((c = getopt_long(argc, argv, "hnt:", long_opts, NULL)) != -1) { + switch (c) { + case 't': + fsname = strdup(optarg); + break; + case 'n': + dryrun = 1; + break; + case 'h': + default: + free(fsname); + return CMD_HELP; + } + } + + if (fsname == NULL) + return CMD_HELP; + + if (llog_default_device(LLOG_DFLT_MGS_SET)) { + rc = CMD_INCOMPLETE; + goto out; + } + + ptr = strstr(fsname, "-OST"); + if (!ptr) { + rc = CMD_HELP; + goto err; + } + + if (dryrun) + fprintf(stdout, "del_ost: dry run for target %s\n", fsname); + + *ptr++ = '\0'; + rc = sscanf(ptr, "OST%04x", &index); + if (rc != 1) { + rc = -EINVAL; + goto err; + } + + if (strlen(ptr) > sizeof(ostname) - 1) { + rc = -E2BIG; + goto err; + } + + snprintf(mdtpattern, sizeof(mdtpattern), "%s-MDT", fsname); + snprintf(clipattern, sizeof(clipattern), "%s-client", fsname); + snprintf(ostname, sizeof(ostname), "%s-%s", fsname, ptr); + + do { + char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; + char *begin, *end; + + start = llog_catlist_next(start, rawbuf, sizeof(rawbuf)); + if (start < 0) + break; + begin = ((struct obd_ioctl_data *)buf)->ioc_bulk; + if (strlen(begin) == 0) + break; + + while ((end = strchr(begin, '\n'))) { + *end = '\0'; + logname = strstr(begin, "config_log: "); + + if (logname && (strstr(logname, mdtpattern) || + strstr(logname, clipattern))) { + logname += 12; + + fprintf(stdout, "config_log: %s\n", logname); + + last_index = llog_last_index(logname); + if (last_index < 0) { + fprintf(stderr, + "error with catalog %s: %s\n", + logname, strerror(-last_index)); + rc = -last_index; + goto err; + } + rc = llog_del_ost(logname, last_index, ostname, + dryrun); + if (rc < 0) + goto err; + } + begin = end + 1; + } + } while (start); + +err: + llog_default_device(LLOG_DFLT_DEV_RESET); +out: + free(fsname); + return rc; +} + #ifdef HAVE_SERVER_SUPPORT /** * Format and send the ioctl to the MGS. diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index 857b5da..5c42bf49 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -157,6 +157,7 @@ int jt_lcfg_setparam(int argc, char **argv); int jt_lcfg_listparam(int argc, char **argv); int jt_pool_cmd(int argc, char **argv); +int jt_del_ost(int argc, char **argv); #ifdef HAVE_SERVER_SUPPORT int jt_barrier_freeze(int argc, char **argv); int jt_barrier_thaw(int argc, char **argv); -- 1.8.3.1