From dd9e79b64d49f6d66d121f072e0e7516963b4da0 Mon Sep 17 00:00:00 2001 From: Hongchao Zhang Date: Sun, 12 Apr 2020 20:40:27 +0800 Subject: [PATCH] LU-12546 mdt: abort recovery between MDTs Add an option to abort recovery between MDTs in case there is a problem during recovery (e.g. MDT is missing or has broken logs), but don't abort recovery between MDT and clients. Change-Id: Id88f2b2ebae5cfa722dcac67c087b9b9a448721e Signed-off-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/36027 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao --- lustre/include/lustre_disk.h | 1 + lustre/include/obd.h | 1 + lustre/include/uapi/linux/lustre/lustre_ioctl.h | 4 +++ lustre/ldlm/ldlm_lib.c | 10 ++++---- lustre/mdt/mdt_handler.c | 12 +++++++-- lustre/obdclass/obd_mount.c | 3 +++ lustre/obdclass/obd_mount_server.c | 9 +++++-- lustre/tests/replay-single.sh | 29 +++++++++++++++++++++ lustre/tests/test-framework.sh | 9 ++++--- lustre/utils/lctl.c | 2 ++ lustre/utils/obd.c | 34 +++++++++++++++++-------- lustre/utils/obdctl.h | 1 + 12 files changed, 93 insertions(+), 22 deletions(-) diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 45222b0..66ec909 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -108,6 +108,7 @@ struct lustre_mount_data { #define LMD_FLG_DEV_RDONLY 0x8000 /* discard modification quitely */ #define LMD_FLG_NO_PRECREATE 0x10000 /* do not allow OST object creation */ #define LMD_FLG_LOCAL_RECOV 0x20000 /* force recovery for local clients */ +#define LMD_FLG_ABORT_RECOV_MDT 0x40000 /* Abort recovery between MDTs */ #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index be04e32..d057d3e 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -619,6 +619,7 @@ struct obd_device { obd_set_up:1, /* finished setup */ obd_recovering:1, /* there are recoverable clients */ obd_abort_recovery:1, /* recovery expired */ + obd_abort_recov_mdt:1, /* only abort recovery between MDTs */ obd_version_recov:1, /* obd uses version checking */ obd_replayable:1, /* recovery enabled; inform clients */ obd_no_recov:1, /* fail instead of retry messages */ diff --git a/lustre/include/uapi/linux/lustre/lustre_ioctl.h b/lustre/include/uapi/linux/lustre/lustre_ioctl.h index 9efcb1a..43dbf5a 100644 --- a/lustre/include/uapi/linux/lustre/lustre_ioctl.h +++ b/lustre/include/uapi/linux/lustre/lustre_ioctl.h @@ -171,6 +171,10 @@ static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data) /* was OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE) until 2.14 */ #define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE) #define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE) +enum obd_abort_recovery_flags { + OBD_FLG_ABORT_RECOV_OST = 0x00008, /* LMD_FLG_ABORT_RECOV */ + OBD_FLG_ABORT_RECOV_MDT = 0x40000, /* LMD_FLG_ABORT_RECOV_MDT */ +}; /* OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) */ #define OBD_GET_VERSION _IOWR('f', 144, OBD_IOC_DATA_TYPE) /* OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index e2605a5..927a424 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2073,7 +2073,7 @@ static int check_for_next_transno(struct lu_target *lut) req_transno = lustre_msg_get_transno(req->rq_reqmsg); } - if (tdtd != NULL) + if (!obd->obd_abort_recov_mdt && tdtd) update_transno = distribute_txn_get_next_transno(tdtd); connected = atomic_read(&obd->obd_connected_clients); @@ -2093,7 +2093,7 @@ static int check_for_next_transno(struct lu_target *lut) } else if (obd->obd_recovery_expired) { CDEBUG(D_HA, "waking for expired recovery\n"); wake_up = 1; - } else if (tdtd != NULL && req != NULL && + } else if (!obd->obd_abort_recov_mdt && tdtd && req && is_req_replayed_by_update(req)) { LASSERTF(req_transno < next_transno, "req_transno %llu next_transno%llu\n", req_transno, @@ -2205,7 +2205,7 @@ repeat: * left in the queue */ spin_lock(&obd->obd_recovery_task_lock); - if (lut->lut_tdtd != NULL) { + if (!obd->obd_abort_recov_mdt && lut->lut_tdtd) { next_update_transno = distribute_txn_get_next_transno(lut->lut_tdtd); @@ -2432,7 +2432,7 @@ static int check_for_recovery_ready(struct lu_target *lut) return 0; } - if (lut->lut_tdtd != NULL) { + if (!obd->obd_abort_recov_mdt && lut->lut_tdtd != NULL) { if (!lut->lut_tdtd->tdtd_replay_ready && !obd->obd_abort_recovery && !obd->obd_stopping) { /* @@ -2484,7 +2484,7 @@ static __u64 get_next_transno(struct lu_target *lut, int *type) if (type != NULL) *type = REQUEST_RECOVERY; - if (tdtd == NULL) + if (!tdtd || obd->obd_abort_recov_mdt) RETURN(transno); update_transno = distribute_txn_get_next_transno(tdtd); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 55baf47..65c956b 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -7095,12 +7095,20 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (rc == 0) rc = dt_ro(&env, dt); break; - case OBD_IOC_ABORT_RECOVERY: + case OBD_IOC_ABORT_RECOVERY: { + struct obd_ioctl_data *data = karg; + CERROR("%s: Aborting recovery for device\n", mdt_obd_name(mdt)); - obd->obd_abort_recovery = 1; + if (data->ioc_type & OBD_FLG_ABORT_RECOV_MDT) + obd->obd_abort_recov_mdt = 1; + else /* if (data->ioc_type & OBD_FLG_ABORT_RECOV_OST) */ + /* lctl didn't set OBD_FLG_ABORT_RECOV_OST < 2.13.57 */ + obd->obd_abort_recovery = 1; + target_stop_recovery_thread(obd); rc = 0; break; + } case OBD_IOC_CHANGELOG_REG: case OBD_IOC_CHANGELOG_DEREG: case OBD_IOC_CHANGELOG_CLEAR: diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 164d773..3ec7d9c 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1350,6 +1350,9 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) if (strncmp(s1, "abort_recov", 11) == 0) { lmd->lmd_flags |= LMD_FLG_ABORT_RECOV; clear++; + } else if (strncmp(s1, "abort_recov_mdt", 15) == 0) { + lmd->lmd_flags |= LMD_FLG_ABORT_RECOV_MDT; + clear++; } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) { lmd->lmd_recovery_time_soft = max_t(int, simple_strtoul(s1 + 19, NULL, 10), diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index 55bbddf..fa4de07 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -1471,10 +1471,15 @@ static int server_start_targets(struct super_block *sb) /* abort recovery only on the complete stack: * many devices can be involved */ - if ((lsi->lsi_lmd->lmd_flags & LMD_FLG_ABORT_RECOV) && + if ((lsi->lsi_lmd->lmd_flags & + (LMD_FLG_ABORT_RECOV | LMD_FLG_ABORT_RECOV_MDT)) && (OBP(obd, iocontrol))) { + struct obd_ioctl_data karg = { + .ioc_type = lsi->lsi_lmd->lmd_flags, + }; + obd_iocontrol(OBD_IOC_ABORT_RECOVERY, obd->obd_self_export, 0, - NULL, NULL); + &karg, NULL); } out_mgc: diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index bba8635..5eac2d7 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -3530,6 +3530,35 @@ test_100b() { } run_test 100b "DNE: create striped dir, fail MDT0" +test_100c() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + ([ $FAILURE_MODE == "HARD" ] && + [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && + skip "MDTs needs to be on diff hosts for HARD fail mode" && + return 0 + + local striped_dir=$DIR/$tdir/striped_dir + + mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed" + + #To make sure MDT1 and MDT0 are connected + #otherwise it may create single stripe dir here + $LFS setdirstripe -i1 $DIR/$tdir/remote_dir + + replay_barrier mds2 + $LFS mkdir -i1 -c2 $striped_dir + + fail_abort mds2 abort_recov_mdt + + createmany -o $striped_dir/f-%d 20 && + error "createmany -o $DIR/$tfile should fail" + + fail mds2 + striped_dir_check_100 || error "striped dir check failed" + rm -rf $DIR/$tdir || error "rmdir failed" +} +run_test 100c "DNE: create striped dir, fail MDT0" + test_101() { #LU-5648 mkdir -p $DIR/$tdir/d1 mkdir -p $DIR/$tdir/d2 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 6cfde4d..62c75bf 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3763,16 +3763,19 @@ fail() { } fail_nodf() { - local facet=$1 - facet_failover $facet + local facet=$1 + + facet_failover $facet } fail_abort() { local facet=$1 + local abort_type=${2:-"abort_recovery"} + stop $facet change_active $facet wait_for_facet $facet - mount_facet $facet -o abort_recovery + mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" } diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index dde98b3..b32b3d1 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -192,6 +192,8 @@ command_t cmdlist[] = { "This command should be used on failed OSC devices in an MDT LOV.\n"}, {"abort_recovery", jt_obd_abort_recovery, 0, "abort recovery on a restarting MDT or OST device\n"}, + {"abort_recovery_mdt", jt_obd_abort_recovery_mdt, 0, + "abort recovery between MDTs\n"}, {"set_timeout", jt_lcfg_set_timeout, 0, "usage: conf_param obd_timeout=\n"}, #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 2940dfe..1b033fd 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -72,6 +72,7 @@ #include #endif #include +#include #include #include #include @@ -934,33 +935,46 @@ int jt_obd_set_readonly(int argc, char **argv) return rc; } -int jt_obd_abort_recovery(int argc, char **argv) +static int obd_abort_recovery(char *cmd, enum obd_abort_recovery_flags flags) { - struct obd_ioctl_data data; + struct obd_ioctl_data data = { + .ioc_dev = cur_device, + .ioc_type = flags, + }; char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; int rc; - memset(&data, 0, sizeof(data)); - data.ioc_dev = cur_device; - - if (argc != 1) - return CMD_HELP; - memset(buf, 0, sizeof(rawbuf)); rc = llapi_ioctl_pack(&data, &buf, sizeof(rawbuf)); if (rc) { fprintf(stderr, "error: %s: invalid ioctl\n", - jt_cmdname(argv[0])); + jt_cmdname(cmd)); return rc; } rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ABORT_RECOVERY, buf); if (rc < 0) - fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]), + fprintf(stderr, "error: %s: %s\n", jt_cmdname(cmd), strerror(rc = errno)); return rc; } +int jt_obd_abort_recovery(int argc, char **argv) +{ + if (argc != 1) + return CMD_HELP; + + return obd_abort_recovery(argv[0], OBD_FLG_ABORT_RECOV_OST); +} + +int jt_obd_abort_recovery_mdt(int argc, char **argv) +{ + if (argc != 1) + return CMD_HELP; + + return obd_abort_recovery(argv[0], OBD_FLG_ABORT_RECOV_MDT); +} + int jt_get_version(int argc, char **argv) { char version[128]; diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index cf06edd..8fbf115 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -101,6 +101,7 @@ int jt_obd_cleanup(int argc, char **argv); int jt_obd_no_transno(int argc, char **argv); int jt_obd_set_readonly(int argc, char **argv); int jt_obd_abort_recovery(int argc, char **argv); +int jt_obd_abort_recovery_mdt(int argc, char **argv); int jt_obd_list(int argc, char **argv); int jt_obd_create(int argc, char **argv); int jt_obd_test_create(int argc, char **argv); -- 1.8.3.1