From 9ee1281060d0a00a9c5d715a9a6d9b99c27123ff Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Thu, 18 May 2023 15:41:47 -0600 Subject: [PATCH] LU-15671 mds: do not send OST_CREATE transno interop Send OST_CREATE RPCs from the MDS with no_resend and no_delay when communicating with an old OST that does not support the OBD_CONNECT2_REPLAY_RESEND. Likewise, the OST should not reply to the MDS RPC with rq_transno set, or this will trigger: osp_precreate_send() ASSERTION(req->rq_transno == 0) failed This can be avoided if the MDS is upgraded before the OSS, but will always be hit if OSS is upgraded first. After 2.20.53 the MDS/OSS assume that this is always true, since rolling upgrades are unsupported for larger version differences. Test-Parameters: testgroup=rolling-upgrade-oss Fixes: 63e17799a3 ("LU-8367 osp: enable replay for precreation request") Signed-off-by: Andreas Dilger Signed-off-by: Sergey Cheremencev Change-Id: I1ab601a2f55540dd75cf24838f7cdb7f823ed42c Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51056 Tested-by: Maloo Tested-by: jenkins Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/include/lustre_export.h | 26 +++++++++++++++++++++++++- lustre/include/uapi/linux/lustre/lustre_idl.h | 5 ++++- lustre/lod/lod_lov.c | 4 +++- lustre/obdclass/lprocfs_status.c | 1 + lustre/ofd/ofd_dev.c | 6 ++++-- lustre/ofd/ofd_internal.h | 3 ++- lustre/ofd/ofd_io.c | 3 ++- lustre/ofd/ofd_obd.c | 2 +- lustre/ofd/ofd_objects.c | 23 +++++++++++++++-------- lustre/osp/osp_precreate.c | 11 +++++++++-- lustre/ptlrpc/wiretest.c | 5 +++++ lustre/utils/wirecheck.c | 4 ++++ lustre/utils/wiretest.c | 4 ++++ 13 files changed, 79 insertions(+), 18 deletions(-) diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 4887865..de28a47 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -44,8 +44,9 @@ #include #include -#include #include +#include +#include #include struct mds_client_data; @@ -500,6 +501,29 @@ static inline int exp_connect_dom_lvb(struct obd_export *exp) return !!(exp_connect_flags2(exp) & OBD_CONNECT2_DOM_LVB); } +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) +/* Only needed for interop with older MDS and 2.16+ OSS for rolling upgrade. + * This is typically unsupported for long periods, especially between large + * large version differences, so assume this is always true in the future + * and the OBD_CONNECT2_REPLAY_CREATE flag can be removed/reused in 2.21+. + */ +static inline bool exp_connect_replay_create(struct obd_export *exp) +{ + return exp_connect_flags2(exp) & OBD_CONNECT2_REPLAY_CREATE; +} + +static inline bool imp_connect_replay_create(struct obd_import *imp) +{ + struct obd_connect_data *ocd = &imp->imp_connect_data; + + return (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2) && + (ocd->ocd_connect_flags2 & OBD_CONNECT2_REPLAY_CREATE); +} +#else +#define exp_connect_replay_create(exp) true +#define imp_connect_replay_create(exp) true +#endif + enum { /* archive_ids in array format */ KKUC_CT_DATA_ARRAY_MAGIC = 0x092013cea, diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 8cbe132..e2e18b9 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -846,6 +846,8 @@ struct ptlrpc_body_v2 { #define OBD_CONNECT2_ENCRYPT_NAME 0x8000000ULL /* name encrypt */ #define OBD_CONNECT2_DMV_IMP_INHERIT 0x20000000ULL /* client handle DMV inheritance */ #define OBD_CONNECT2_ENCRYPT_FID2PATH 0x40000000ULL /* fid2path enc file */ +/* For MDS+OSS rolling upgrade interop with 2.16+older, ignored after 2.20.53 */ +#define OBD_CONNECT2_REPLAY_CREATE 0x80000000ULL /* replay OST_CREATE */ /* XXX README XXX README XXX README XXX README XXX README XXX README XXX * Please DO NOT add OBD_CONNECT flags before first ensuring that this value * is not in use by some other branch/patch. Email adilger@whamcloud.com @@ -937,7 +939,8 @@ struct ptlrpc_body_v2 { #define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\ OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK |\ - OBD_CONNECT2_REP_MBITS) + OBD_CONNECT2_REP_MBITS |\ + OBD_CONNECT2_REPLAY_CREATE) #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID | OBD_CONNECT_FLAGS2) #define ECHO_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index a641090..b3322c5 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -175,7 +175,8 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, if (data == NULL) GOTO(out_cleanup, rc = -ENOMEM); - data->ocd_connect_flags = OBD_CONNECT_INDEX | OBD_CONNECT_VERSION; + data->ocd_connect_flags = OBD_CONNECT_INDEX | OBD_CONNECT_VERSION | + OBD_CONNECT_FLAGS2; data->ocd_version = LUSTRE_VERSION_CODE; data->ocd_index = index; @@ -196,6 +197,7 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK | OBD_CONNECT_BULK_MBITS; + data->ocd_connect_flags2 = OBD_CONNECT2_REPLAY_CREATE; data->ocd_group = tgt_index; ltd = &lod->lod_ost_descs; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 9ed044d..d552488 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -651,6 +651,7 @@ static const char *const obd_connect_names[] = { "mkdir_replay", /* 0x10000000 */ "dmv_imp_inherit", /* 0x20000000 */ "encryption_fid2path", /* 0x40000000 */ + "replay_create", /* 0x80000000 */ NULL }; diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index e1e97a9..fe5660d 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1605,6 +1605,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) } if (diff > 0) { time64_t enough_time = ktime_get_seconds() + DISK_TIMEOUT; + bool trans_local; u64 next_id; int created = 0; int count; @@ -1644,7 +1645,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) } } - + trans_local = !exp_connect_replay_create(req->rq_export); while (diff > 0) { next_id = ofd_seq_last_oid(oseq) + 1; count = ofd_precreate_batch(ofd, (int)diff); @@ -1663,7 +1664,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) } rc = ofd_precreate_objects(tsi->tsi_env, ofd, next_id, - oseq, count, sync_trans); + oseq, count, sync_trans, + trans_local); if (rc > 0) { created += rc; diff -= rc; diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 951bc2e..8c7100e 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -382,7 +382,8 @@ int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo); int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo, const struct obdo *oa, struct filter_fid *ff); int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, - u64 id, struct ofd_seq *oseq, int nr, int sync); + u64 id, struct ofd_seq *oseq, int nr, int sync, + bool trans_local); static inline void ofd_object_put(const struct lu_env *env, struct ofd_object *fo) diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 32e55d8..fc40364 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -748,7 +748,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, int count = ofd_precreate_batch(ofd, diff); rc = ofd_precreate_objects(env, ofd, next_id, - oseq, count, sync); + oseq, count, sync, + false); if (rc < 0) { mutex_unlock(&oseq->os_create_lock); ofd_seq_put(env, oseq); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index be70c7c..494ee2f 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -1079,7 +1079,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, next_id = ofd_seq_last_oid(oseq) + 1; count = ofd_precreate_batch(ofd, (int)diff); - rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0); + rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0, false); if (rc < 0) { CERROR("%s: unable to precreate: rc = %d\n", ofd_name(ofd), rc); diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index c1d8c62..c258bed 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -41,6 +41,7 @@ #include #include +#include #include "ofd_internal.h" @@ -236,18 +237,20 @@ static int ofd_precreate_cb_add(const struct lu_env *env, struct thandle *th, * update the inode. The ctime = 0 case is also handled specially in * osd_inode_setattr(). See LU-221, LU-1042 for details. * - * \param[in] env execution environment - * \param[in] ofd OFD device - * \param[in] id object ID to start precreation from - * \param[in] oseq object sequence - * \param[in] nr number of objects to precreate - * \param[in] sync synchronous precreation flag + * \param[in] env execution environment + * \param[in] ofd OFD device + * \param[in] id object ID to start precreation from + * \param[in] oseq object sequence + * \param[in] nr number of objects to precreate + * \param[in] sync synchronous precreation flag + * \param[in] trans_local start local transaction * * \retval 0 if successful * \retval negative value on error */ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, - u64 id, struct ofd_seq *oseq, int nr, int sync) + u64 id, struct ofd_seq *oseq, int nr, int sync, + bool trans_local) { struct ofd_thread_info *info = ofd_info(env); struct ofd_object *fo = NULL; @@ -359,7 +362,11 @@ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, } } - rc = dt_trans_start(env, ofd->ofd_osd, th); + /* Only needed for MDS+OSS rolling upgrade interop with 2.16+older. */ + if (unlikely(trans_local)) + rc = dt_trans_start_local(env, ofd->ofd_osd, th); + else + rc = dt_trans_start(env, ofd->ofd_osd, th); if (rc) GOTO(trans_stop, rc); diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index ba10932..cabc772 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -632,6 +632,13 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d) RETURN(-ENOMEM); req->rq_request_portal = OST_CREATE_PORTAL; + /* We should not resend create request - anyway we will have delorphan + * and kill these objects. + * Only needed for MDS+OSS rolling upgrade interop with 2.16+older. + */ + if (unlikely(!imp_connect_replay_create(imp))) + req->rq_no_delay = req->rq_no_resend = 1; + /* Delorphan happens only with a first MDT-OST connect. resend/replay * handles objects creation on reconnects, no need to do delorhpan * in this case. @@ -859,7 +866,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, struct osp_thread_info *osi = osp_env_info(env); struct lu_fid *last_fid = &osi->osi_fid; struct ptlrpc_request *req = NULL; - struct obd_import *imp; + struct obd_import *imp = d->opd_obd->u.cli.cl_import; struct ost_body *body; int update_status = 0; int rc; @@ -872,7 +879,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, * all precreate requests uses resend/replay flags to support OST * failover/reconnect. */ - if (d->opd_cleanup_orphans_done) { + if (d->opd_cleanup_orphans_done && imp_connect_replay_create(imp)) { rc = osp_get_lastfid_from_ost(env, d, false); RETURN(0); } diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 88b67a3..58e0610 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1265,6 +1265,7 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_connect_data, paddingF)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF)); + LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n", OBD_CONNECT_RDONLY); LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n", @@ -1387,6 +1388,7 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_OBDOPACK); LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_FLAGS2); + LASSERTF(OBD_CONNECT2_FILE_SECCTX == 0x1ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_FILE_SECCTX); LASSERTF(OBD_CONNECT2_LOCKAHEAD == 0x2ULL, "found 0x%.16llxULL\n", @@ -1443,6 +1445,9 @@ void lustre_assert_wire_constants(void) OBD_CONNECT2_DMV_IMP_INHERIT); LASSERTF(OBD_CONNECT2_ENCRYPT_FID2PATH == 0x40000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_ENCRYPT_FID2PATH); + LASSERTF(OBD_CONNECT2_REPLAY_CREATE == 0x80000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT2_REPLAY_CREATE); + LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", (unsigned)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index faf0c0b..cb26884 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -591,6 +591,7 @@ check_obd_connect_data(void) CHECK_MEMBER(obd_connect_data, paddingE); CHECK_MEMBER(obd_connect_data, paddingF); + BLANK_LINE(); CHECK_DEFINE_64X(OBD_CONNECT_RDONLY); CHECK_DEFINE_64X(OBD_CONNECT_INDEX); CHECK_DEFINE_64X(OBD_CONNECT_MDS); @@ -652,6 +653,7 @@ check_obd_connect_data(void) CHECK_DEFINE_64X(OBD_CONNECT_BULK_MBITS); CHECK_DEFINE_64X(OBD_CONNECT_OBDOPACK); CHECK_DEFINE_64X(OBD_CONNECT_FLAGS2); + BLANK_LINE(); CHECK_DEFINE_64X(OBD_CONNECT2_FILE_SECCTX); CHECK_DEFINE_64X(OBD_CONNECT2_LOCKAHEAD); CHECK_DEFINE_64X(OBD_CONNECT2_DIR_MIGRATE); @@ -680,7 +682,9 @@ check_obd_connect_data(void) CHECK_DEFINE_64X(OBD_CONNECT2_ENCRYPT_NAME); CHECK_DEFINE_64X(OBD_CONNECT2_DMV_IMP_INHERIT); CHECK_DEFINE_64X(OBD_CONNECT2_ENCRYPT_FID2PATH); + CHECK_DEFINE_64X(OBD_CONNECT2_REPLAY_CREATE); + BLANK_LINE(); CHECK_VALUE_X(OBD_CKSUM_CRC32); CHECK_VALUE_X(OBD_CKSUM_ADLER); CHECK_VALUE_X(OBD_CKSUM_CRC32C); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index c5dd604..7dfedcc 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1413,6 +1413,7 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_OBDOPACK); LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_FLAGS2); + LASSERTF(OBD_CONNECT2_FILE_SECCTX == 0x1ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_FILE_SECCTX); LASSERTF(OBD_CONNECT2_LOCKAHEAD == 0x2ULL, "found 0x%.16llxULL\n", @@ -1469,6 +1470,9 @@ void lustre_assert_wire_constants(void) OBD_CONNECT2_DMV_IMP_INHERIT); LASSERTF(OBD_CONNECT2_ENCRYPT_FID2PATH == 0x40000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_ENCRYPT_FID2PATH); + LASSERTF(OBD_CONNECT2_REPLAY_CREATE == 0x80000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT2_REPLAY_CREATE); + LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", (unsigned)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", -- 1.8.3.1