Whamcloud - gitweb
LU-1644 mgs: swab nidtbl entries for 2.2 clients
authorJinshan Xiong <jinshan.xiong@intel.com>
Mon, 6 Aug 2012 23:30:24 +0000 (16:30 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 21 Aug 2012 03:27:25 +0000 (23:27 -0400)
LU-1252 is missed in 2.2 release so it caused a problem that 2.2
clients always swab nidtbl entries even if the server and client
are using the same endian.  Conversely, 2.3 clients would not
swab the nidtbl entries but the 2.2 server would always swab.

To make it work, 2.3 adds a temporary OBD_CONNECT_MNE_SWAB flag,
which will cause the server to swab the nidtbl entries for 2.2
clients, and 2.3 clients will undo the swab done by 2.2 servers
if the MNE_SWAB flag is not set.  This avoids problems with only
checking the client/server version, in case the LU-1252 fix is
ever applied to a 2.2 client or server build.

This workaround is set to auto-expire for 2.5.50+ clients and
servers, which is a reasonable upper limit for interoperability
with old unpatched 2.2 clients in a mixed-endian environment.
This is enough for 2.2.0 clients/servers to work with the 2.5.x
feature releases, after which the OBD_CONNECT flag can be reused.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Id316f7e1c7ee2b2c1d1077e8c5dd916edca04d84
Reviewed-on: http://review.whamcloud.com/3548
Tested-by: Hudson
Reviewed-by: Jinshan Xiong <jinshan.xiong@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_export.h
lustre/include/lustre_import.h
lustre/ldlm/ldlm_lib.c
lustre/liblustre/llite_lib.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_nids.c
lustre/obdclass/obd_mount.c
lustre/ptlrpc/import.c

index beeaf4b..8809569 100644 (file)
@@ -1155,11 +1155,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                                   * write RPC error properly */
 #define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
                                                   * finer space reservation */
                                                   * write RPC error properly */
 #define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
                                                   * finer space reservation */
-#define OBD_CONNECT_NANOSECOND_TIMES 0x200000000000ULL /* nanosec resolution
-                                                       * timestamps supported
-                                                       */
+#define OBD_CONNECT_NANOSEC_TIME 0x200000000000ULL /* nanosecond timestamps */
 #define OBD_CONNECT_LVB_TYPE   0x400000000000ULL /* variable type of LVB */
 #define OBD_CONNECT_LVB_TYPE   0x400000000000ULL /* variable type of LVB */
-
 /* XXX README XXX:
  * Please DO NOT add flag values here before first ensuring that this same
  * flag value is not in use on some other branch.  Please clear any such
 /* XXX README XXX:
  * Please DO NOT add flag values here before first ensuring that this same
  * flag value is not in use on some other branch.  Please clear any such
@@ -1168,6 +1165,12 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
  * and updates obd_connect_names[] for lprocfs_rd_connect_flags(), so it
  * can be approved and landed easily to reserve the flag for future use. */
 
  * and updates obd_connect_names[] for lprocfs_rd_connect_flags(), so it
  * can be approved and landed easily to reserve the flag for future use. */
 
+/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
+ * connection.  It is a temporary bug fix for Imperative Recovery interop
+ * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for
+ * 2.2 clients/servers is no longer needed.  LU-1252/LU-1644. */
+#define OBD_CONNECT_MNE_SWAB            OBD_CONNECT_MDS_MDS
+
 #define OCD_HAS_FLAG(ocd, flg)  \
         (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
 
 #define OCD_HAS_FLAG(ocd, flg)  \
         (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
 
@@ -1209,7 +1212,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
                                OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
-                                OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
+                               OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | \
+                               OBD_CONNECT_MNE_SWAB)
 
 /* Features required for this version of the client to work with server */
 #define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
 
 /* Features required for this version of the client to work with server */
 #define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
index a0740c7..66a0c6c 100644 (file)
@@ -247,7 +247,10 @@ struct obd_export {
                                   exp_libclient:1, /* liblustre client? */
                                   /* client timed out and tried to reconnect,
                                    * but couldn't because of active rpcs */
                                   exp_libclient:1, /* liblustre client? */
                                   /* client timed out and tried to reconnect,
                                    * but couldn't because of active rpcs */
-                                  exp_abort_active_req:1;
+                                 exp_abort_active_req:1,
+                                 /* if to swap nidtbl entries for 2.2 clients.
+                                  * Only used by the MGS to fix LU-1644. */
+                                 exp_need_mne_swab:1;
         /* also protected by exp_lock */
         enum lustre_sec_part      exp_sp_peer;
         struct sptlrpc_flavor     exp_flvr;             /* current */
         /* also protected by exp_lock */
         enum lustre_sec_part      exp_sp_peer;
         struct sptlrpc_flavor     exp_flvr;             /* current */
index f32c9ef..9338a97 100644 (file)
@@ -264,6 +264,7 @@ struct obd_import {
                                   imp_pingable:1,         /* pingable */
                                   imp_resend_replay:1,    /* resend for replay */
                                   imp_no_pinger_recover:1,/* disable normal recovery, for test only. */
                                   imp_pingable:1,         /* pingable */
                                   imp_resend_replay:1,    /* resend for replay */
                                   imp_no_pinger_recover:1,/* disable normal recovery, for test only. */
+                                 imp_need_mne_swab:1,    /* need IR MNE swab */
                                   imp_force_reconnect:1;  /* import must be reconnected instead of chouse new connection */
         __u32                     imp_connect_op;
         struct obd_connect_data   imp_connect_data;
                                   imp_force_reconnect:1;  /* import must be reconnected instead of chouse new connection */
         __u32                     imp_connect_op;
         struct obd_connect_data   imp_connect_data;
index f1d7877..c8c0697 100644 (file)
@@ -747,7 +747,8 @@ int target_handle_connect(struct ptlrpc_request *req)
         struct obd_connect_data *data, *tmpdata;
         int size, tmpsize;
         lnet_nid_t *client_nid = NULL;
         struct obd_connect_data *data, *tmpdata;
         int size, tmpsize;
         lnet_nid_t *client_nid = NULL;
-        ENTRY;
+       bool mne_swab_client_ver;
+       ENTRY;
 
         OBD_RACE(OBD_FAIL_TGT_CONN_RACE);
 
 
         OBD_RACE(OBD_FAIL_TGT_CONN_RACE);
 
@@ -838,6 +839,15 @@ int target_handle_connect(struct ptlrpc_request *req)
         if (rc)
                 GOTO(out, rc);
 
         if (rc)
                 GOTO(out, rc);
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+       /* Check if the client might be missing the LU-1252 fix to swab
+        * the IR mne_length entries. Do this as early as possible in case
+        * the version code is modified. See LU-1644 for details. */
+       mne_swab_client_ver = OBD_OCD_VERSION_MAJOR(data->ocd_version) == 2 &&
+                             OBD_OCD_VERSION_MINOR(data->ocd_version) == 2 &&
+                             OBD_OCD_VERSION_PATCH(data->ocd_version) < 55;
+#endif
+
         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
                 if (!data) {
                         DEBUG_REQ(D_WARNING, req, "Refusing old (unversioned) "
         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
                 if (!data) {
                         DEBUG_REQ(D_WARNING, req, "Refusing old (unversioned) "
@@ -1032,6 +1042,21 @@ dont_check_exports:
         if (rc)
                 GOTO(out, rc);
 
         if (rc)
                 GOTO(out, rc);
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+       /* 2.2.0 clients always swab nidtbl entries due to a bug, so server
+        * will do the swabbing for if the client is using the same endianness.
+        *
+        * This fixup is version-limited, because we don't want to carry the
+        * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we need
+        * interop with unpatched 2.2 clients.  For newer clients, servers
+        * will never do MNE swabbing, let the client handle that.  LU-1644 */
+       export->exp_need_mne_swab =
+               !(data->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+               mne_swab_client_ver && !ptlrpc_req_need_swab(req);
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and exp_need_mne_swab"
+#endif
+
         LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
         data->ocd_instance = target->u.obt.obt_instance;
 
         LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
         data->ocd_instance = target->u.obt.obt_instance;
 
index 8a10004..df524f1 100644 (file)
@@ -159,12 +159,8 @@ int liblustre_process_log(struct config_llog_instance *cfg,
         if (ocd == NULL)
                 GOTO(out_cleanup, rc = -ENOMEM);
 
         if (ocd == NULL)
                 GOTO(out_cleanup, rc = -ENOMEM);
 
-        ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
-                                 OBD_CONNECT_AT | OBD_CONNECT_VBR |
-                                 OBD_CONNECT_FULL20;
-#ifdef LIBLUSTRE_POSIX_ACL
-        ocd->ocd_connect_flags |= OBD_CONNECT_ACL;
-#endif
+       ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
+                                OBD_CONNECT_FULL20;
         ocd->ocd_version = LUSTRE_VERSION_CODE;
 
         rc = obd_connect(NULL, &exp, obd, &mgc_uuid, ocd, NULL);
         ocd->ocd_version = LUSTRE_VERSION_CODE;
 
         rc = obd_connect(NULL, &exp, obd, &mgc_uuid, ocd, NULL);
index 96ef54d..d489bc6 100644 (file)
@@ -1229,9 +1229,9 @@ enum {
 };
 
 static int mgc_apply_recover_logs(struct obd_device *mgc,
 };
 
 static int mgc_apply_recover_logs(struct obd_device *mgc,
-                                  struct config_llog_data *cld,
-                                  __u64 max_version,
-                                  void *data, int datalen, int need_swab)
+                                 struct config_llog_data *cld,
+                                 __u64 max_version,
+                                 void *data, int datalen, bool mne_swab)
 {
         struct config_llog_instance *cfg = &cld->cld_cfg;
         struct lustre_sb_info       *lsi = s2lsi(cfg->cfg_sb);
 {
         struct config_llog_instance *cfg = &cld->cld_cfg;
         struct lustre_sb_info       *lsi = s2lsi(cfg->cfg_sb);
@@ -1292,11 +1292,16 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
                 if (datalen < entry_len) /* must have entry_len at least */
                         break;
 
                 if (datalen < entry_len) /* must have entry_len at least */
                         break;
 
-                if (need_swab)
-                        lustre_swab_mgs_nidtbl_entry(entry);
-                LASSERT(entry->mne_length <= CFS_PAGE_SIZE);
-                if (entry->mne_length < entry_len)
-                        break;
+               /* Keep this swab for normal mixed endian handling. LU-1644 */
+               if (mne_swab)
+                       lustre_swab_mgs_nidtbl_entry(entry);
+               if (entry->mne_length > CFS_PAGE_SIZE) {
+                       CERROR("MNE too large (%u)\n", entry->mne_length);
+                       break;
+               }
+
+               if (entry->mne_length < entry_len)
+                       break;
 
                 off     += entry->mne_length;
                 datalen -= entry->mne_length;
 
                 off     += entry->mne_length;
                 datalen -= entry->mne_length;
@@ -1418,6 +1423,7 @@ static int mgc_process_recover_log(struct obd_device *obd,
         cfs_page_t **pages;
         int nrpages;
         bool eof = true;
         cfs_page_t **pages;
         int nrpages;
         bool eof = true;
+       bool mne_swab = false;
         int i;
         int ealen;
         int rc;
         int i;
         int ealen;
         int rc;
@@ -1504,14 +1510,24 @@ again:
                 GOTO(out, rc);
         }
 
                 GOTO(out, rc);
         }
 
+       mne_swab = !!ptlrpc_rep_need_swab(req);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+       /* This import flag means the server did an extra swab of IR MNE
+        * records (fixed in LU-1252), reverse it here if needed. LU-1644 */
+       if (unlikely(req->rq_import->imp_need_mne_swab))
+               mne_swab = !mne_swab;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and exp_need_mne_swab"
+#endif
+
         for (i = 0; i < nrpages && ealen > 0; i++) {
                 int rc2;
                 void *ptr;
 
                 ptr = cfs_kmap(pages[i]);
                 rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
         for (i = 0; i < nrpages && ealen > 0; i++) {
                 int rc2;
                 void *ptr;
 
                 ptr = cfs_kmap(pages[i]);
                 rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
-                                             min_t(int, ealen, CFS_PAGE_SIZE),
-                                             ptlrpc_rep_need_swab(req));
+                                            min_t(int, ealen, CFS_PAGE_SIZE),
+                                            mne_swab);
                 cfs_kunmap(pages[i]);
                 if (rc2 < 0) {
                         CWARN("Process recover log %s error %d\n",
                 cfs_kunmap(pages[i]);
                 if (rc2 < 0) {
                         CWARN("Process recover log %s error %d\n",
index 5399e47..a9a9769 100644 (file)
@@ -86,7 +86,7 @@ static int nidtbl_is_sane(struct mgs_nidtbl *tbl)
  * nidtbl entries will be packed in @pages by @unit_size units - entries
  * shouldn't cross unit boundaries.
  */
  * nidtbl entries will be packed in @pages by @unit_size units - entries
  * shouldn't cross unit boundaries.
  */
-static int mgs_nidtbl_read(struct obd_device *unused, struct mgs_nidtbl *tbl,
+static int mgs_nidtbl_read(struct obd_export *exp, struct mgs_nidtbl *tbl,
                            struct mgs_config_res *res, cfs_page_t **pages,
                            int nrpages, int units_total, int unit_size)
 {
                            struct mgs_config_res *res, cfs_page_t **pages,
                            int nrpages, int units_total, int unit_size)
 {
@@ -146,7 +146,17 @@ static int mgs_nidtbl_read(struct obd_device *unused, struct mgs_nidtbl *tbl,
 
                         /* check if we need to consume remaining bytes. */
                         if (last_in_unit != NULL && bytes_in_unit) {
 
                         /* check if we need to consume remaining bytes. */
                         if (last_in_unit != NULL && bytes_in_unit) {
-                                last_in_unit->mne_length += bytes_in_unit;
+#
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+                               /* May need to swab back to update the length.*/
+                               if (exp->exp_need_mne_swab)
+                                       lustre_swab_mgs_nidtbl_entry(last_in_unit);
+#endif
+                               last_in_unit->mne_length += bytes_in_unit;
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+                               if (exp->exp_need_mne_swab)
+                                       lustre_swab_mgs_nidtbl_entry(last_in_unit);
+#endif
                                 rc  += bytes_in_unit;
                                 buf += bytes_in_unit;
                                 last_in_unit = NULL;
                                 rc  += bytes_in_unit;
                                 buf += bytes_in_unit;
                                 last_in_unit = NULL;
@@ -193,6 +203,12 @@ static int mgs_nidtbl_read(struct obd_device *unused, struct mgs_nidtbl *tbl,
                 memcpy(entry->u.nids, mti->mti_nids,
                        mti->mti_nid_count * sizeof(lnet_nid_t));
 
                 memcpy(entry->u.nids, mti->mti_nids,
                        mti->mti_nid_count * sizeof(lnet_nid_t));
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+               /* For LU-1644, swab entry for 2.2 clients. */
+               if (exp->exp_need_mne_swab)
+                       lustre_swab_mgs_nidtbl_entry(entry);
+#endif
+
                 version = tgt->mnt_version;
                 rc     += entry_len;
                 buf    += entry_len;
                 version = tgt->mnt_version;
                 rc     += entry_len;
                 buf    += entry_len;
@@ -634,8 +650,8 @@ int mgs_get_ir_logs(struct ptlrpc_request *req)
 
         res->mcr_offset = body->mcb_offset;
         unit_size = min_t(int, 1 << body->mcb_bits, CFS_PAGE_SIZE);
 
         res->mcr_offset = body->mcb_offset;
         unit_size = min_t(int, 1 << body->mcb_bits, CFS_PAGE_SIZE);
-        bytes = mgs_nidtbl_read(obd, &fsdb->fsdb_nidtbl, res, pages, nrpages,
-                                bufsize / unit_size, unit_size);
+       bytes = mgs_nidtbl_read(req->rq_export, &fsdb->fsdb_nidtbl, res,
+                               pages, nrpages, bufsize / unit_size, unit_size);
         if (bytes < 0)
                 GOTO(out, rc = bytes);
 
         if (bytes < 0)
                 GOTO(out, rc = bytes);
 
index 26fc3f6..1335646 100644 (file)
@@ -851,10 +851,11 @@ static int lustre_start_mgc(struct super_block *sb)
         if (rc)
                 /* nonfatal */
                 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
         if (rc)
                 /* nonfatal */
                 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
-        /* We connect to the MGS at setup, and don't disconnect until cleanup */
-        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
-                                  OBD_CONNECT_AT | OBD_CONNECT_FULL20   |
-                                  OBD_CONNECT_IMP_RECOV;
+
+       /* We connect to the MGS at setup, and don't disconnect until cleanup */
+       data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
+                                 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
+                                 OBD_CONNECT_MNE_SWAB;
         if (lmd_is_client(lsi->lsi_lmd) &&
             lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
                 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
         if (lmd_is_client(lsi->lsi_lmd) &&
             lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
                 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
index 5df9591..d2e4379 100644 (file)
@@ -1028,6 +1028,27 @@ finish:
                                       newer : older, LUSTRE_VERSION_STRING);
                 }
 
                                       newer : older, LUSTRE_VERSION_STRING);
                 }
 
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
+               /* Check if server has LU-1252 fix applied to not always swab
+                * the IR MNE entries. Do this only once per connection.  This
+                * fixup is version-limited, because we don't want to carry the
+                * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
+                * need interop with unpatched 2.2 servers.  For newer servers,
+                * the client will do MNE swabbing only as needed.  LU-1644 */
+               if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+                            !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+                            OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
+                            OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
+                            OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
+                            strcmp(imp->imp_obd->obd_type->typ_name,
+                                   LUSTRE_MGC_NAME) == 0))
+                       imp->imp_need_mne_swab = 1;
+               else /* clear if server was upgraded since last connect */
+                       imp->imp_need_mne_swab = 0;
+#else
+#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and exp_need_mne_swab"
+#endif
+
                if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
                        /* We sent to the server ocd_cksum_types with bits set
                         * for algorithms we understand. The server masked off
                if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
                        /* We sent to the server ocd_cksum_types with bits set
                         * for algorithms we understand. The server masked off