From: Artem Blagodarenko Date: Thu, 16 Jul 2020 08:37:51 +0000 (-0400) Subject: LU-14090 mgs: no local logs flag X-Git-Tag: 2.14.52~185 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f38f09e02a05c82718344ad86f80a4a0f399af9d;hp=2a34dc95bd100c181573e231047ff8976e296a36 LU-14090 mgs: no local logs flag There is a feature that starts a target with a local copy of config log in order to avoid a delay in communicating with an MGS and to load mgs log updates later on. However, that feature is not always useful. When replace_nids adds records with new nids it does not append remote config logs but overwrite corresponding records in place. If a target starts using local config log - it gets confused by outdated nids. This patch adds tunefs.lustre --nolocallogs key that sets nolocallogs flag, which says ignore local configs copy. The flag is reset once new logs are uploaded from MGS. tunefs.lustre --nolocallogs is suggested to be executed on targets together with replace_nids on MGS. HPE-bug-id: LUS-2510 Change-Id: I949c19ac701d287e1c1199bc12445989476a707b Signed-off-by: Artem Blagodarenko Reviewed-on: https://es-gerrit.dev.cray.com/157574 Reviewed-by: Vladimir Saveliev Reviewed-by: Nikitas Angelinas Tested-by: Alexander Lezhoev Reviewed-on: https://review.whamcloud.com/40448 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/doc/tunefs.lustre.8 b/lustre/doc/tunefs.lustre.8 index 3d8c6b6..bde1071 100644 --- a/lustre/doc/tunefs.lustre.8 +++ b/lustre/doc/tunefs.lustre.8 @@ -134,6 +134,19 @@ Correct order of operations is: .br * Mount clients .TP +.BI \--nolocallogs +Use configuration logs from the MGS, not local ones. +.br +There is feature that starts a target with a local copy of +the config log in order to avoid a delay in communicating with +the MGS and to load MGS log updates later on. +However, that feature is not always useful. +.br +replace_nids changes config logs on the server side and local copies +become invalid. --nolocallogs sets the nolocallogs mount flag, +which causes the local copy of the config log to be ignored. +The flag is reset once new logs are downloaded from MGS. +.TP .BI \--quota Enable space accounting on old 2.x devices. diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 3a071d4..4821985 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -109,6 +109,7 @@ struct lustre_mount_data { #define LMD_FLG_NO_PRECREATE 0x10000 /* do not allow OST object creation */ #define LMD_FLG_LOCAL_RECOV 0x20000 /* force recovery for local clients */ #define LMD_FLG_ABORT_RECOV_MDT 0x40000 /* Abort recovery between MDTs */ +#define LMD_FLG_NO_LOCAL_LOGS 0x80000 /* Use config logs from MGS */ #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) diff --git a/lustre/include/uapi/linux/lustre/lustre_disk.h b/lustre/include/uapi/linux/lustre/lustre_disk.h index e9cbf30..458eb3a 100644 --- a/lustre/include/uapi/linux/lustre/lustre_disk.h +++ b/lustre/include/uapi/linux/lustre/lustre_disk.h @@ -102,6 +102,8 @@ #define LDD_F_ERROR 0x4000 /** process at lctl conf_param */ #define LDD_F_PARAM2 0x8000 +/** the target shouldn't use local logs */ +#define LDD_F_NO_LOCAL_LOGS 0x10000 #define LDD_MAGIC 0x1dd00001 diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 0d06d75..ecce505 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -1924,21 +1924,32 @@ static int mgc_process_cfg_log(struct obd_device *mgc, cli->cl_mgc_configs_dir != NULL && lu2dt_dev(cli->cl_mgc_configs_dir->do_lu.lo_dev) == lsi->lsi_dt_dev) { - if (!local_only && !lsi->lsi_dt_dev->dd_rdonly) + if (!local_only && !lsi->lsi_dt_dev->dd_rdonly) { /* Only try to copy log if we have the lock. */ + CDEBUG(D_INFO, "%s: copy local log %s\n", + mgc->obd_name, cld->cld_logname); + rc = mgc_llog_local_copy(env, mgc, ctxt, lctxt, cld->cld_logname); + if (!rc) + lsi->lsi_flags &= ~LDD_F_NO_LOCAL_LOGS; + } if (local_only || rc) { + if (unlikely(lsi->lsi_flags & LDD_F_NO_LOCAL_LOGS) + || rc) { + CWARN("%s: local log %s are not valid and/or remote logs are not accessbile rc = %d\n", + mgc->obd_name, cld->cld_logname, rc); + GOTO(out_pop, rc = -EIO); + } + if (strcmp(cld->cld_logname, PARAMS_FILENAME) != 0 && llog_is_empty(env, lctxt, cld->cld_logname)) { - LCONSOLE_ERROR_MSG(0x13a, "Failed to get MGS " - "log %s and no local copy." - "\n", cld->cld_logname); + LCONSOLE_ERROR_MSG(0x13a, "Failed to get MGS log %s and no local copy.\n", + cld->cld_logname); GOTO(out_pop, rc = -ENOENT); } - CDEBUG(D_MGC, "Failed to get MGS log %s, using local " - "copy for now, will try to update later.\n", - cld->cld_logname); + CDEBUG(D_MGC, "%s: Failed to get MGS log %s, using local copy for now, will try to update later.\n", + mgc->obd_name, cld->cld_logname); rc = 0; } /* Now, whether we copied or not, start using the local llog. diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 37165a2..be1521f 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1395,6 +1395,9 @@ int lmd_parse(char *options, struct lustre_mount_data *lmd) } else if (strncmp(s1, "writeconf", 9) == 0) { lmd->lmd_flags |= LMD_FLG_WRITECONF; clear++; + } else if (strncmp(s1, "nolocallogs", 11) == 0) { + lmd->lmd_flags |= LMD_FLG_NO_LOCAL_LOGS; + clear++; } else if (strncmp(s1, "update", 6) == 0) { lmd->lmd_flags |= LMD_FLG_UPDATE; clear++; diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index acd9222..374ec8b 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -1553,6 +1553,8 @@ static int lsi_prepare(struct lustre_sb_info *lsi) */ lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_WRITECONF) ? LDD_F_WRITECONF : 0; + lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_NO_LOCAL_LOGS) ? + LDD_F_NO_LOCAL_LOGS : 0; lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_VIRGIN) ? LDD_F_VIRGIN : 0; lsi->lsi_flags |= (lsi->lsi_lmd->lmd_flags & LMD_FLG_UPDATE) ? diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 2e390d1..f540da2 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -9125,6 +9125,43 @@ test_127() { } run_test 127 "direct io overwrite on full ost" +test_128() +{ + combined_mgs_mds && skip "need separate mgs device" + [ "$ost2_FSTYPE" == zfs ] && import_zpool ost2 + + format_ost 2 + # Try to apply nolocallogs to the virgin OST. Should fail. + do_facet ost2 "$TUNEFS --nolocallogs $(ostdevname 2)" && + error "nolocallogs should not be allowed on the virgin target" + + setupall + stopall + + [ "$ost1_FSTYPE" == zfs ] && import_zpool ost1 + # Start OST without MGS (local configs) + do_facet ost1 "$TUNEFS --dryrun $(ostdevname 1)" + start_ost || error "unable to start OST1" + stop_ost || error "Unable to stop OST1" + + [ "$ost1_FSTYPE" == zfs ] && import_zpool ost1 + # Do not allow reading local configs, should fail + do_facet ost1 "$TUNEFS --nolocallogs $(ostdevname 1)" || + error "Can not set nolocallogs" + start_ost && error "OST1 started, but should fail" + + # Connect to MGS successfully, reset nolocallogs flag + [ "$ost1_FSTYPE" == zfs ] && import_zpool ost1 + start_mgs || error "unable to start MGS" + start_ost || error "unable to start OST1" + + do_facet ost1 "$TUNEFS --dryrun $(ostdevname 1)" | grep "nolocallogs" && + error "nolocallogs expected to be reset" + + stop_ost || error "Unable to stop OST1" +} +run_test 128 "Force using remote logs with --nolocallogs" + if ! combined_mgs_mds ; then stop mgs fi diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index a366039..b1f0f58 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -156,6 +156,7 @@ void usage(FILE *out) "\t\t--erase-param : erase all instances of a parameter\n" "\t\t--erase-params: erase all old parameter settings\n" "\t\t--writeconf: erase all config logs for this fs.\n" + "\t\t--nolocallogs: use logs from MGS, not local ones.\n" "\t\t--quota: enable space accounting on old 2.x device.\n" "\t\t--rename: rename the filesystem name\n" #endif @@ -187,7 +188,7 @@ void print_ldd(char *str, struct mkfs_opts *mop) printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); - printf(" (%s%s%s%s%s%s%s%s)\n", + printf(" (%s%s%s%s%s%s%s%s%s)\n", IS_MDT(ldd) ? "MDT " : "", IS_OST(ldd) ? "OST " : "", IS_MGS(ldd) ? "MGS " : "", @@ -195,7 +196,8 @@ void print_ldd(char *str, struct mkfs_opts *mop) ldd->ldd_flags & LDD_F_VIRGIN ? "first_time " : "", ldd->ldd_flags & LDD_F_UPDATE ? "update " : "", ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf " : "", - ldd->ldd_flags & LDD_F_NO_PRIMNODE ? "no_primnode " : ""); + ldd->ldd_flags & LDD_F_NO_PRIMNODE ? "no_primnode " : "", + ldd->ldd_flags & LDD_F_NO_LOCAL_LOGS ? "nolocallogs " : ""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); osd_print_ldd_params(mop); if (ldd->ldd_userdata[0]) @@ -228,10 +230,11 @@ static bool server_make_name(__u32 flags, __u16 index, const char *fs, if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { if (!(flags & LDD_F_SV_ALL)) snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs, - (flags & LDD_F_VIRGIN) ? ':' : - ((flags & LDD_F_WRITECONF) ? '=' : '-'), - (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", - index); + (flags & LDD_F_VIRGIN) ? ':' : + ((flags & LDD_F_WRITECONF) ? '=' : + ((flags & LDD_F_NO_LOCAL_LOGS) ? '+' : '-')), + (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", + index); } else if (flags & LDD_F_SV_TYPE_MGS) { snprintf(name_buf, name_buf_size, "MGS"); } else { @@ -399,6 +402,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, { .val = 'E', .name = "erase-param", .has_arg = required_argument}, { .val = 'e', .name = "erase-params", .has_arg = no_argument}, + { .val = 'l', .name = "nolocallogs", .has_arg = no_argument}, { .val = 'Q', .name = "quota", .has_arg = no_argument}, { .val = 'R', .name = "rename", .has_arg = optional_argument}, { .val = 'w', .name = "writeconf", .has_arg = no_argument}, @@ -408,7 +412,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, #ifndef TUNEFS "b:c:d:k:MOrR"; #else - "E:eQR::w"; + "E:elQR::w"; #endif struct lustre_disk_data *ldd = &mop->mo_ldd; char new_fsname[16] = { 0 }; @@ -737,6 +741,13 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, case 'w': ldd->ldd_flags |= LDD_F_WRITECONF; break; + case 'l': + if (ldd->ldd_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) { + fprintf(stderr, "Can not apply nolocallogs to the target that was writeconfed or never been registered\n"); + return EINVAL; + } + ldd->ldd_flags |= LDD_F_NO_LOCAL_LOGS; + break; #endif /* !TUNEFS */ default: if (opt != '?') { @@ -870,7 +881,8 @@ int main(int argc, char *const argv[]) } strscpy(old_fsname, ldd->ldd_fsname, sizeof(ldd->ldd_fsname)); - ldd->ldd_flags &= ~(LDD_F_WRITECONF | LDD_F_VIRGIN); + ldd->ldd_flags &= ~(LDD_F_WRITECONF | LDD_F_VIRGIN | + LDD_F_NO_LOCAL_LOGS); /* svname of the form lustre:OST1234 means never registered */ ret = strlen(ldd->ldd_svname); @@ -880,6 +892,9 @@ int main(int argc, char *const argv[]) } else if (ldd->ldd_svname[ret - 8] == '=') { ldd->ldd_svname[ret - 8] = '-'; ldd->ldd_flags |= LDD_F_WRITECONF; + } else if (ldd->ldd_svname[ret - 8] == '+') { + ldd->ldd_svname[ret - 8] = '-'; + ldd->ldd_flags |= LDD_F_NO_LOCAL_LOGS; } if (strstr(ldd->ldd_params, PARAM_MGSNODE)) @@ -1007,8 +1022,11 @@ int main(int argc, char *const argv[]) goto out; } - if (check_mtab_entry(mop.mo_device, mop.mo_device, NULL, NULL)) + if (check_mtab_entry(mop.mo_device, mop.mo_device, NULL, NULL)) { + fprintf(stderr, "%s: is currently mounted, exiting without any change\n", + mop.mo_device); return EEXIST; + } /* Create the loopback file */ if (mop.mo_flags & MO_IS_LOOP) { @@ -1064,6 +1082,15 @@ int main(int argc, char *const argv[]) (void)osd_label_lustre(&opts); } + /* update svname with '+' to force remote logs */ + if (ldd->ldd_flags & LDD_F_NO_LOCAL_LOGS) { + struct mount_opts opts; + + opts.mo_ldd = *ldd; + opts.mo_source = mop.mo_device; + (void) osd_label_lustre(&opts); + } + /* Rename filesystem fsname */ if (mop.mo_flags & MO_RENAME) { ret = osd_rename_fsname(&mop, old_fsname); diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 1c9c79b..7b363a5 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -560,7 +560,8 @@ static int parse_ldd(char *source, struct mount_opts *mop, clear_update_ondisk(source, ldd); /* Since we never rewrite ldd, ignore temp flags */ - ldd->ldd_flags &= ~(LDD_F_VIRGIN | LDD_F_WRITECONF); + ldd->ldd_flags &= ~(LDD_F_VIRGIN | LDD_F_WRITECONF | + LDD_F_NO_LOCAL_LOGS); /* This is to make sure default options go first */ temp_options = strdup(options); @@ -588,6 +589,9 @@ static int parse_ldd(char *source, struct mount_opts *mop, } else if (ldd->ldd_svname[rc - 8] == '=') { ldd->ldd_svname[rc - 8] = '-'; ldd->ldd_flags |= LDD_F_WRITECONF; + } else if (ldd->ldd_svname[rc - 8] == '+') { + ldd->ldd_svname[rc - 8] = '-'; + ldd->ldd_flags |= LDD_F_NO_LOCAL_LOGS; } } /* backend osd type */ @@ -634,6 +638,11 @@ static int parse_ldd(char *source, struct mount_opts *mop, if (rc != 0) return rc; } + if (ldd->ldd_flags & LDD_F_NO_LOCAL_LOGS) { + rc = append_option(options, options_len, "nolocallogs", NULL); + if (rc != 0) + return rc; + } if (ldd->ldd_flags & LDD_F_NO_PRIMNODE) { rc = append_option(options, options_len, "noprimnode", NULL); if (rc != 0) @@ -804,7 +813,8 @@ static void label_lustre(struct mount_opts *mop) if (mop->mo_nosvc) return; - if (mop->mo_ldd.ldd_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) { + if (mop->mo_ldd.ldd_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF | + LDD_F_NO_LOCAL_LOGS)) { (void)osd_label_lustre(mop); } else { struct lustre_disk_data ldd;