Whamcloud - gitweb
LU-15671 mds: do not send OST_CREATE transno interop 56/51056/10
authorAndreas Dilger <adilger@whamcloud.com>
Thu, 18 May 2023 21:41:47 +0000 (15:41 -0600)
committerOleg Drokin <green@whamcloud.com>
Tue, 20 Jun 2023 03:35:45 +0000 (03:35 +0000)
Send OST_CREATE RPCs from the MDS with no_resend and no_delay
when communicating with an old OST that does not support the
OBD_CONNECT2_REPLAY_RESEND.  Likewise, the OST should not reply
to the MDS RPC with rq_transno set, or this will trigger:

   osp_precreate_send() ASSERTION(req->rq_transno == 0) failed

This can be avoided if the MDS is upgraded before the OSS, but
will always be hit if OSS is upgraded first.

After 2.20.53 the MDS/OSS assume that this is always true, since
rolling upgrades are unsupported for larger version differences.

Test-Parameters: testgroup=rolling-upgrade-oss
Fixes: 63e17799a3 ("LU-8367 osp: enable replay for precreation request")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Sergey Cheremencev <scherementsev@ddn.com>
Change-Id: I1ab601a2f55540dd75cf24838f7cdb7f823ed42c
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51056
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
13 files changed:
lustre/include/lustre_export.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/lod/lod_lov.c
lustre/obdclass/lprocfs_status.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_io.c
lustre/ofd/ofd_obd.c
lustre/ofd/ofd_objects.c
lustre/osp/osp_precreate.c
lustre/ptlrpc/wiretest.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 4887865..de28a47 100644 (file)
@@ -44,8 +44,9 @@
 #include <linux/rhashtable.h>
 #include <linux/workqueue.h>
 
-#include <lprocfs_status.h>
 #include <uapi/linux/lustre/lustre_idl.h>
+#include <uapi/linux/lustre/lustre_ver.h>
+#include <lprocfs_status.h>
 #include <lustre_dlm.h>
 
 struct mds_client_data;
@@ -500,6 +501,29 @@ static inline int exp_connect_dom_lvb(struct obd_export *exp)
        return !!(exp_connect_flags2(exp) & OBD_CONNECT2_DOM_LVB);
 }
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0)
+/* Only needed for interop with older MDS and 2.16+ OSS for rolling upgrade.
+ * This is typically unsupported for long periods, especially between large
+ * large version differences, so assume this is always true in the future
+ * and the OBD_CONNECT2_REPLAY_CREATE flag can be removed/reused in 2.21+.
+ */
+static inline bool exp_connect_replay_create(struct obd_export *exp)
+{
+       return exp_connect_flags2(exp) & OBD_CONNECT2_REPLAY_CREATE;
+}
+
+static inline bool imp_connect_replay_create(struct obd_import *imp)
+{
+       struct obd_connect_data *ocd = &imp->imp_connect_data;
+
+       return (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2) &&
+               (ocd->ocd_connect_flags2 & OBD_CONNECT2_REPLAY_CREATE);
+}
+#else
+#define exp_connect_replay_create(exp) true
+#define imp_connect_replay_create(exp) true
+#endif
+
 enum {
        /* archive_ids in array format */
        KKUC_CT_DATA_ARRAY_MAGIC        = 0x092013cea,
index 8cbe132..e2e18b9 100644 (file)
@@ -846,6 +846,8 @@ struct ptlrpc_body_v2 {
 #define OBD_CONNECT2_ENCRYPT_NAME        0x8000000ULL /* name encrypt */
 #define OBD_CONNECT2_DMV_IMP_INHERIT    0x20000000ULL /* client handle DMV inheritance */
 #define OBD_CONNECT2_ENCRYPT_FID2PATH   0x40000000ULL /* fid2path enc file */
+/* For MDS+OSS rolling upgrade interop with 2.16+older, ignored after 2.20.53 */
+#define OBD_CONNECT2_REPLAY_CREATE      0x80000000ULL /* replay OST_CREATE */
 /* XXX README XXX README XXX README XXX README XXX README XXX README XXX
  * Please DO NOT add OBD_CONNECT flags before first ensuring that this value
  * is not in use by some other branch/patch.  Email adilger@whamcloud.com
@@ -937,7 +939,8 @@ struct ptlrpc_body_v2 {
 
 #define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\
                                OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK |\
-                               OBD_CONNECT2_REP_MBITS)
+                               OBD_CONNECT2_REP_MBITS |\
+                               OBD_CONNECT2_REPLAY_CREATE)
 
 #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID | OBD_CONNECT_FLAGS2)
 #define ECHO_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS
index a641090..b3322c5 100644 (file)
@@ -175,7 +175,8 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod,
        if (data == NULL)
                GOTO(out_cleanup, rc = -ENOMEM);
 
-       data->ocd_connect_flags = OBD_CONNECT_INDEX | OBD_CONNECT_VERSION;
+       data->ocd_connect_flags = OBD_CONNECT_INDEX | OBD_CONNECT_VERSION |
+                                 OBD_CONNECT_FLAGS2;
        data->ocd_version = LUSTRE_VERSION_CODE;
        data->ocd_index = index;
 
@@ -196,6 +197,7 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod,
                                           OBD_CONNECT_PINGLESS |
                                           OBD_CONNECT_LFSCK |
                                           OBD_CONNECT_BULK_MBITS;
+               data->ocd_connect_flags2 = OBD_CONNECT2_REPLAY_CREATE;
 
                data->ocd_group = tgt_index;
                ltd = &lod->lod_ost_descs;
index 9ed044d..d552488 100644 (file)
@@ -651,6 +651,7 @@ static const char *const obd_connect_names[] = {
        "mkdir_replay",                 /* 0x10000000 */
        "dmv_imp_inherit",              /* 0x20000000 */
        "encryption_fid2path",          /* 0x40000000 */
+       "replay_create",                /* 0x80000000 */
        NULL
 };
 
index e1e97a9..fe5660d 100644 (file)
@@ -1605,6 +1605,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
        }
        if (diff > 0) {
                time64_t enough_time = ktime_get_seconds() + DISK_TIMEOUT;
+               bool trans_local;
                u64 next_id;
                int created = 0;
                int count;
@@ -1644,7 +1645,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                        }
                }
 
-
+               trans_local = !exp_connect_replay_create(req->rq_export);
                while (diff > 0) {
                        next_id = ofd_seq_last_oid(oseq) + 1;
                        count = ofd_precreate_batch(ofd, (int)diff);
@@ -1663,7 +1664,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                        }
 
                        rc = ofd_precreate_objects(tsi->tsi_env, ofd, next_id,
-                                                  oseq, count, sync_trans);
+                                                  oseq, count, sync_trans,
+                                                  trans_local);
                        if (rc > 0) {
                                created += rc;
                                diff -= rc;
index 951bc2e..8c7100e 100644 (file)
@@ -382,7 +382,8 @@ int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo);
 int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo,
                         const struct obdo *oa, struct filter_fid *ff);
 int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
-                         u64 id, struct ofd_seq *oseq, int nr, int sync);
+                         u64 id, struct ofd_seq *oseq, int nr, int sync,
+                         bool trans_local);
 
 static inline void ofd_object_put(const struct lu_env *env,
                                  struct ofd_object *fo)
index 32e55d8..fc40364 100644 (file)
@@ -748,7 +748,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                                int count = ofd_precreate_batch(ofd, diff);
 
                                rc = ofd_precreate_objects(env, ofd, next_id,
-                                                          oseq, count, sync);
+                                                          oseq, count, sync,
+                                                          false);
                                if (rc < 0) {
                                        mutex_unlock(&oseq->os_create_lock);
                                        ofd_seq_put(env, oseq);
index be70c7c..494ee2f 100644 (file)
@@ -1079,7 +1079,7 @@ static int ofd_echo_create(const struct lu_env *env, struct obd_export *exp,
        next_id = ofd_seq_last_oid(oseq) + 1;
        count = ofd_precreate_batch(ofd, (int)diff);
 
-       rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0);
+       rc = ofd_precreate_objects(env, ofd, next_id, oseq, count, 0, false);
        if (rc < 0) {
                CERROR("%s: unable to precreate: rc = %d\n",
                       ofd_name(ofd), rc);
index c1d8c62..c258bed 100644 (file)
@@ -41,6 +41,7 @@
 
 #include <dt_object.h>
 #include <lustre_lfsck.h>
+#include <lustre_export.h>
 
 #include "ofd_internal.h"
 
@@ -236,18 +237,20 @@ static int ofd_precreate_cb_add(const struct lu_env *env, struct thandle *th,
  * update the inode. The ctime = 0 case is also handled specially in
  * osd_inode_setattr(). See LU-221, LU-1042 for details.
  *
- * \param[in] env      execution environment
- * \param[in] ofd      OFD device
- * \param[in] id       object ID to start precreation from
- * \param[in] oseq     object sequence
- * \param[in] nr       number of objects to precreate
- * \param[in] sync     synchronous precreation flag
+ * \param[in] env              execution environment
+ * \param[in] ofd              OFD device
+ * \param[in] id               object ID to start precreation from
+ * \param[in] oseq             object sequence
+ * \param[in] nr               number of objects to precreate
+ * \param[in] sync             synchronous precreation flag
+ * \param[in] trans_local      start local transaction
  *
  * \retval             0 if successful
  * \retval             negative value on error
  */
 int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
-                         u64 id, struct ofd_seq *oseq, int nr, int sync)
+                         u64 id, struct ofd_seq *oseq, int nr, int sync,
+                         bool trans_local)
 {
        struct ofd_thread_info  *info = ofd_info(env);
        struct ofd_object       *fo = NULL;
@@ -359,7 +362,11 @@ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
                }
        }
 
-       rc = dt_trans_start(env, ofd->ofd_osd, th);
+       /* Only needed for MDS+OSS rolling upgrade interop with 2.16+older. */
+       if (unlikely(trans_local))
+               rc = dt_trans_start_local(env, ofd->ofd_osd, th);
+       else
+               rc = dt_trans_start(env, ofd->ofd_osd, th);
        if (rc)
                GOTO(trans_stop, rc);
 
index ba10932..cabc772 100644 (file)
@@ -632,6 +632,13 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
                RETURN(-ENOMEM);
        req->rq_request_portal = OST_CREATE_PORTAL;
 
+       /* We should not resend create request - anyway we will have delorphan
+        * and kill these objects.
+        * Only needed for MDS+OSS rolling upgrade interop with 2.16+older.
+        */
+       if (unlikely(!imp_connect_replay_create(imp)))
+               req->rq_no_delay = req->rq_no_resend = 1;
+
        /* Delorphan happens only with a first MDT-OST connect. resend/replay
         * handles objects creation on reconnects, no need to do delorhpan
         * in this case.
@@ -859,7 +866,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
        struct osp_thread_info  *osi = osp_env_info(env);
        struct lu_fid           *last_fid = &osi->osi_fid;
        struct ptlrpc_request   *req = NULL;
-       struct obd_import       *imp;
+       struct obd_import       *imp = d->opd_obd->u.cli.cl_import;
        struct ost_body         *body;
        int                      update_status = 0;
        int                      rc;
@@ -872,7 +879,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env,
         * all precreate requests uses resend/replay flags to support OST
         * failover/reconnect.
         */
-       if (d->opd_cleanup_orphans_done) {
+       if (d->opd_cleanup_orphans_done && imp_connect_replay_create(imp)) {
                rc = osp_get_lastfid_from_ost(env, d, false);
                RETURN(0);
        }
index 88b67a3..58e0610 100644 (file)
@@ -1265,6 +1265,7 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct obd_connect_data, paddingF));
        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n",
                 (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF));
+
        LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_RDONLY);
        LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n",
@@ -1387,6 +1388,7 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT_OBDOPACK);
        LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_FLAGS2);
+
        LASSERTF(OBD_CONNECT2_FILE_SECCTX == 0x1ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT2_FILE_SECCTX);
        LASSERTF(OBD_CONNECT2_LOCKAHEAD == 0x2ULL, "found 0x%.16llxULL\n",
@@ -1443,6 +1445,9 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT2_DMV_IMP_INHERIT);
        LASSERTF(OBD_CONNECT2_ENCRYPT_FID2PATH == 0x40000000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT2_ENCRYPT_FID2PATH);
+       LASSERTF(OBD_CONNECT2_REPLAY_CREATE == 0x80000000ULL, "found 0x%.16llxULL\n",
+                OBD_CONNECT2_REPLAY_CREATE);
+
        LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)OBD_CKSUM_CRC32);
        LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
index faf0c0b..cb26884 100644 (file)
@@ -591,6 +591,7 @@ check_obd_connect_data(void)
        CHECK_MEMBER(obd_connect_data, paddingE);
        CHECK_MEMBER(obd_connect_data, paddingF);
 
+       BLANK_LINE();
        CHECK_DEFINE_64X(OBD_CONNECT_RDONLY);
        CHECK_DEFINE_64X(OBD_CONNECT_INDEX);
        CHECK_DEFINE_64X(OBD_CONNECT_MDS);
@@ -652,6 +653,7 @@ check_obd_connect_data(void)
        CHECK_DEFINE_64X(OBD_CONNECT_BULK_MBITS);
        CHECK_DEFINE_64X(OBD_CONNECT_OBDOPACK);
        CHECK_DEFINE_64X(OBD_CONNECT_FLAGS2);
+       BLANK_LINE();
        CHECK_DEFINE_64X(OBD_CONNECT2_FILE_SECCTX);
        CHECK_DEFINE_64X(OBD_CONNECT2_LOCKAHEAD);
        CHECK_DEFINE_64X(OBD_CONNECT2_DIR_MIGRATE);
@@ -680,7 +682,9 @@ check_obd_connect_data(void)
        CHECK_DEFINE_64X(OBD_CONNECT2_ENCRYPT_NAME);
        CHECK_DEFINE_64X(OBD_CONNECT2_DMV_IMP_INHERIT);
        CHECK_DEFINE_64X(OBD_CONNECT2_ENCRYPT_FID2PATH);
+       CHECK_DEFINE_64X(OBD_CONNECT2_REPLAY_CREATE);
 
+       BLANK_LINE();
        CHECK_VALUE_X(OBD_CKSUM_CRC32);
        CHECK_VALUE_X(OBD_CKSUM_ADLER);
        CHECK_VALUE_X(OBD_CKSUM_CRC32C);
index c5dd604..7dfedcc 100644 (file)
@@ -1413,6 +1413,7 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT_OBDOPACK);
        LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_FLAGS2);
+
        LASSERTF(OBD_CONNECT2_FILE_SECCTX == 0x1ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT2_FILE_SECCTX);
        LASSERTF(OBD_CONNECT2_LOCKAHEAD == 0x2ULL, "found 0x%.16llxULL\n",
@@ -1469,6 +1470,9 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT2_DMV_IMP_INHERIT);
        LASSERTF(OBD_CONNECT2_ENCRYPT_FID2PATH == 0x40000000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT2_ENCRYPT_FID2PATH);
+       LASSERTF(OBD_CONNECT2_REPLAY_CREATE == 0x80000000ULL, "found 0x%.16llxULL\n",
+                OBD_CONNECT2_REPLAY_CREATE);
+
        LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)OBD_CKSUM_CRC32);
        LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",