From: Andreas Dilger Date: Thu, 18 May 2023 21:41:47 +0000 (-0600) Subject: LU-15671 mds: do not send OST_CREATE transno interop X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=b85a12aa730dc3a93b5e35bf46d942b67ea06d4d;p=fs%2Flustre-release.git LU-15671 mds: do not send OST_CREATE transno interop Send OST_CREATE RPCs from the MDS with no_resend and no_delay when communicating with an old OST that does not support the OBD_CONNECT2_REPLAY_RESEND. Likewise, the OST should not reply to the MDS RPC with rq_transno set, or this will trigger: osp_precreate_send() ASSERTION(req->rq_transno == 0) failed This can be avoided if the MDS is upgraded before the OSS, but will always be hit if OSS is upgraded first. After 2.20.53 the MDS/OSS assume that this is always true, since rolling upgrades are unsupported for larger version differences. Lustre-change: https://review.whamcloud.com/51056 Lustre-commit: 9ee1281060d0a00a9c5d715a9a6d9b99c27123ff Test-Parameters: testgroup=rolling-upgrade-oss Fixes: 63e17799a3 ("LU-8367 osp: enable replay for precreation request") Signed-off-by: Andreas Dilger Signed-off-by: Sergey Cheremencev Change-Id: I1ab601a2f55540dd75cf24838f7cdb7f823ed42c Reviewed-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51425 Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 67d496a..464b7c8 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -45,8 +45,9 @@ #include #include -#include #include +#include +#include #include struct mds_client_data; @@ -496,6 +497,29 @@ static inline int exp_connect_dom_lvb(struct obd_export *exp) return !!(exp_connect_flags2(exp) & OBD_CONNECT2_DOM_LVB); } +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0) +/* Only needed for interop with older MDS and 2.16+ OSS for rolling upgrade. + * This is typically unsupported for long periods, especially between large + * large version differences, so assume this is always true in the future + * and the OBD_CONNECT2_REPLAY_CREATE flag can be removed/reused in 2.21+. + */ +static inline bool exp_connect_replay_create(struct obd_export *exp) +{ + return exp_connect_flags2(exp) & OBD_CONNECT2_REPLAY_CREATE; +} + +static inline bool imp_connect_replay_create(struct obd_import *imp) +{ + struct obd_connect_data *ocd = &imp->imp_connect_data; + + return (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2) && + (ocd->ocd_connect_flags2 & OBD_CONNECT2_REPLAY_CREATE); +} +#else +#define exp_connect_replay_create(exp) true +#define imp_connect_replay_create(exp) true +#endif + enum { /* archive_ids in array format */ KKUC_CT_DATA_ARRAY_MAGIC = 0x092013cea, diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 3e5d295..53105ed 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -868,6 +868,8 @@ struct ptlrpc_body_v2 { #define OBD_CONNECT2_ATOMIC_OPEN_LOCK 0x4000000ULL /* lock on first open */ #define OBD_CONNECT2_ENCRYPT_NAME 0x8000000ULL /* name encrypt */ #define OBD_CONNECT2_ENCRYPT_FID2PATH 0x40000000ULL /* fid2path enc file */ +/* For MDS+OSS rolling upgrade interop with 2.16+older, ignored after 2.20.53 */ +#define OBD_CONNECT2_REPLAY_CREATE 0x80000000ULL /* replay OST_CREATE */ #define OBD_CONNECT2_LARGE_NID 0x100000000ULL /* understands large/IPv6 NIDs */ #define OBD_CONNECT2_COMPRESS 0x200000000ULL /* compressed file */ /* XXX README XXX README XXX README XXX README XXX README XXX README XXX @@ -958,7 +960,9 @@ struct ptlrpc_body_v2 { OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2) #define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\ - OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK) + OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK | \ + OBD_CONNECT2_REP_MBITS |\ + OBD_CONNECT2_REPLAY_CREATE) #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID) #define ECHO_CONNECT_SUPPORTED2 0 diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index 5580ae7..f146b31 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -175,7 +175,8 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, if (data == NULL) GOTO(out_cleanup, rc = -ENOMEM); - data->ocd_connect_flags = OBD_CONNECT_INDEX | OBD_CONNECT_VERSION; + data->ocd_connect_flags = OBD_CONNECT_INDEX | OBD_CONNECT_VERSION | + OBD_CONNECT_FLAGS2; data->ocd_version = LUSTRE_VERSION_CODE; data->ocd_index = index; @@ -196,6 +197,7 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK | OBD_CONNECT_BULK_MBITS; + data->ocd_connect_flags2 = OBD_CONNECT2_REPLAY_CREATE; data->ocd_group = tgt_index; ltd = &lod->lod_ost_descs; diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 9139473..1c65fff 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1645,6 +1645,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) } if (diff > 0) { time64_t enough_time = ktime_get_seconds() + DISK_TIMEOUT; + bool trans_local; u64 next_id; int created = 0; int count; @@ -1684,7 +1685,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) } } - + trans_local = !exp_connect_replay_create(req->rq_export); while (diff > 0) { next_id = ofd_seq_last_oid(oseq) + 1; count = ofd_precreate_batch(ofd, (int)diff); @@ -1703,7 +1704,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) } rc = ofd_precreate_objects(tsi->tsi_env, ofd, next_id, - oseq, count, sync_trans); + oseq, count, sync_trans, + trans_local); if (rc > 0) { created += rc; diff -= rc; diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 0587826..2b0ee15 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -387,7 +387,8 @@ int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo); int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo, const struct obdo *oa, struct filter_fid *ff); int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, - u64 id, struct ofd_seq *oseq, int nr, int sync); + u64 id, struct ofd_seq *oseq, int nr, int sync, + bool trans_local); static inline void ofd_object_put(const struct lu_env *env, struct ofd_object *fo) diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 5364153..852fb76 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -749,7 +749,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, int count = ofd_precreate_batch(ofd, diff); rc = ofd_precreate_objects(env, ofd, next_id, - oseq, count, sync); + oseq, count, sync, + false); if (rc < 0) { mutex_unlock(&oseq->os_create_lock); ofd_seq_put(env, oseq); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 6bb1441..d281e9b 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -1068,7 +1068,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp, next_id = ofd_seq_last_oid(oseq) + 1; count = ofd_precreate_batch(ofd, (int)diff); - rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0); + rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0, false); if (rc < 0) { CERROR("%s: unable to precreate: rc = %d\n", ofd_name(ofd), rc); diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 6d40ad4..ff13541 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -42,6 +42,7 @@ #include #include +#include #include "ofd_internal.h" @@ -237,18 +238,20 @@ static int ofd_precreate_cb_add(const struct lu_env *env, struct thandle *th, * update the inode. The ctime = 0 case is also handled specially in * osd_inode_setattr(). See LU-221, LU-1042 for details. * - * \param[in] env execution environment - * \param[in] ofd OFD device - * \param[in] id object ID to start precreation from - * \param[in] oseq object sequence - * \param[in] nr number of objects to precreate - * \param[in] sync synchronous precreation flag + * \param[in] env execution environment + * \param[in] ofd OFD device + * \param[in] id object ID to start precreation from + * \param[in] oseq object sequence + * \param[in] nr number of objects to precreate + * \param[in] sync synchronous precreation flag + * \param[in] trans_local start local transaction * * \retval 0 if successful * \retval negative value on error */ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, - u64 id, struct ofd_seq *oseq, int nr, int sync) + u64 id, struct ofd_seq *oseq, int nr, int sync, + bool trans_local) { struct ofd_thread_info *info = ofd_info(env); struct ofd_object *fo = NULL; @@ -360,7 +363,11 @@ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, } } - rc = dt_trans_start(env, ofd->ofd_osd, th); + /* Only needed for MDS+OSS rolling upgrade interop with 2.16+older. */ + if (unlikely(trans_local)) + rc = dt_trans_start_local(env, ofd->ofd_osd, th); + else + rc = dt_trans_start(env, ofd->ofd_osd, th); if (rc) GOTO(trans_stop, rc); diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 9f0bf0e..837f7fb 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -636,6 +636,13 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d) RETURN(-ENOMEM); req->rq_request_portal = OST_CREATE_PORTAL; + /* We should not resend create request - anyway we will have delorphan + * and kill these objects. + * Only needed for MDS+OSS rolling upgrade interop with 2.16+older. + */ + if (unlikely(!imp_connect_replay_create(imp))) + req->rq_no_delay = req->rq_no_resend = 1; + /* Delorphan happens only with a first MDT-OST connect. resend/replay * handles objects creation on reconnects, no need to do delorhpan * in this case. @@ -859,7 +866,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, struct osp_thread_info *osi = osp_env_info(env); struct lu_fid *last_fid = &osi->osi_fid; struct ptlrpc_request *req = NULL; - struct obd_import *imp; + struct obd_import *imp = d->opd_obd->u.cli.cl_import; struct ost_body *body; int update_status = 0; int rc; @@ -872,7 +879,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, * all precreate requests uses resend/replay flags to support OST * failover/reconnect. */ - if (d->opd_cleanup_orphans_done) { + if (d->opd_cleanup_orphans_done && imp_connect_replay_create(imp)) { rc = osp_get_lastfid_from_ost(env, d, false); RETURN(0); } diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 85597b7..9d1680f 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1280,6 +1280,7 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_connect_data, paddingF)); LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF)); + LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n", OBD_CONNECT_RDONLY); LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n", @@ -1402,6 +1403,7 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_OBDOPACK); LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_FLAGS2); + LASSERTF(OBD_CONNECT2_FILE_SECCTX == 0x1ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_FILE_SECCTX); LASSERTF(OBD_CONNECT2_LOCKAHEAD == 0x2ULL, "found 0x%.16llxULL\n", @@ -1456,6 +1458,8 @@ void lustre_assert_wire_constants(void) OBD_CONNECT2_ENCRYPT_NAME); LASSERTF(OBD_CONNECT2_ENCRYPT_FID2PATH == 0x40000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_ENCRYPT_FID2PATH); + LASSERTF(OBD_CONNECT2_REPLAY_CREATE == 0x80000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT2_REPLAY_CREATE); LASSERTF(OBD_CONNECT2_LARGE_NID == 0x100000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_LARGE_NID); LASSERTF(OBD_CONNECT2_COMPRESS == 0x200000000ULL, "found 0x%.16llxULL\n", diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 17c0c64..61511d4 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -600,6 +600,7 @@ check_obd_connect_data(void) CHECK_MEMBER(obd_connect_data, paddingE); CHECK_MEMBER(obd_connect_data, paddingF); + BLANK_LINE(); CHECK_DEFINE_64X(OBD_CONNECT_RDONLY); CHECK_DEFINE_64X(OBD_CONNECT_INDEX); CHECK_DEFINE_64X(OBD_CONNECT_MDS); @@ -661,6 +662,7 @@ check_obd_connect_data(void) CHECK_DEFINE_64X(OBD_CONNECT_BULK_MBITS); CHECK_DEFINE_64X(OBD_CONNECT_OBDOPACK); CHECK_DEFINE_64X(OBD_CONNECT_FLAGS2); + BLANK_LINE(); CHECK_DEFINE_64X(OBD_CONNECT2_FILE_SECCTX); CHECK_DEFINE_64X(OBD_CONNECT2_LOCKAHEAD); CHECK_DEFINE_64X(OBD_CONNECT2_DIR_MIGRATE); @@ -688,6 +690,7 @@ check_obd_connect_data(void) CHECK_DEFINE_64X(OBD_CONNECT2_ATOMIC_OPEN_LOCK); CHECK_DEFINE_64X(OBD_CONNECT2_ENCRYPT_NAME); CHECK_DEFINE_64X(OBD_CONNECT2_ENCRYPT_FID2PATH); + CHECK_DEFINE_64X(OBD_CONNECT2_REPLAY_CREATE); CHECK_DEFINE_64X(OBD_CONNECT2_LARGE_NID); CHECK_DEFINE_64X(OBD_CONNECT2_COMPRESS); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 8e5c993..4384042 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1438,6 +1438,7 @@ void lustre_assert_wire_constants(void) OBD_CONNECT_OBDOPACK); LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT_FLAGS2); + LASSERTF(OBD_CONNECT2_FILE_SECCTX == 0x1ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_FILE_SECCTX); LASSERTF(OBD_CONNECT2_LOCKAHEAD == 0x2ULL, "found 0x%.16llxULL\n", @@ -1492,6 +1493,8 @@ void lustre_assert_wire_constants(void) OBD_CONNECT2_ENCRYPT_NAME); LASSERTF(OBD_CONNECT2_ENCRYPT_FID2PATH == 0x40000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_ENCRYPT_FID2PATH); + LASSERTF(OBD_CONNECT2_REPLAY_CREATE == 0x80000000ULL, "found 0x%.16llxULL\n", + OBD_CONNECT2_REPLAY_CREATE); LASSERTF(OBD_CONNECT2_LARGE_NID == 0x100000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_LARGE_NID); LASSERTF(OBD_CONNECT2_COMPRESS == 0x200000000ULL, "found 0x%.16llxULL\n",