Whamcloud - gitweb
LU-13306 mgs: use large NIDS in the nid table on the MGS 96/50896/32
authorJames Simmons <jsimmons@infradead.org>
Wed, 19 Jul 2023 18:04:15 +0000 (14:04 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 Aug 2023 05:34:31 +0000 (05:34 +0000)
On the MGS the NIDs detected are handled using the struct
mgs_target_info which currently only handles lnet_nid_t.
This structure also limits the number of NIDs to 32 entries.
Some sites have reported that 32 NIDs wasn't enough when
they configured virtual LNet networks for isolation.

Update the mgs_target_info to use NID strings instead.
This has the advantage of working even if struct lnet_nid
expands in the future. We place this data at the end of
the mgs_target_info as a flexible array. This requires
updating the ptlrpc packet handling to increase the size
to some new value to contain all the NIDs registered.
Also this gives us the option to use hostnames in the
future. This information is then feed into a
struct mgs_nidtbl_entry which is sent to the mgc on all
the remote nodes. With this patch only large NIDs for
small address space is translated to the original
lnet_nid_t format and sent to the various clients.
All the server targets, which are clients of the MGS,
use the large NID format. With this patch we don't
have to patch old clients when the servers are using
the larger NID format.

Expand LNetGetId() to return large NID addresses as well.
In the future we will use the ocd_connect_flags to
determine if the MSG supports large NID addresses.

Change-Id: I7083d6ecfc46cf0419a0d4a582e4bf5240f193cd
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50896
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
18 files changed:
lnet/include/lnet/api.h
lnet/lnet/api-ni.c
lnet/selftest/console.c
lustre/include/lustre_disk.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/llite/llite_lib.c
lustre/lmv/lmv_obd.c
lustre/mgc/mgc_request_server.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_llog.c
lustre/mgs/mgs_nids.c
lustre/obdclass/obd_mount.c
lustre/ptlrpc/nodemap_handler.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/target/tgt_mount.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 573e08d..2d15089 100644 (file)
@@ -74,7 +74,7 @@ int LNetNIFini(void);
  *
  * \see LNetMEAttach
  * @{ */
-int LNetGetId(unsigned int index, struct lnet_processid *id);
+int LNetGetId(unsigned int index, struct lnet_processid *id, bool large_nids);
 int LNetDist(struct lnet_nid *nid, struct lnet_nid *srcnid, __u32 *order);
 void LNetPrimaryNID(struct lnet_nid *nid);
 bool LNetIsPeerLocal(struct lnet_nid *nid);
index 15c60fa..95dfbf7 100644 (file)
@@ -4105,7 +4105,7 @@ LNetCtl(unsigned int cmd, void *arg)
        case IOC_LIBCFS_GET_NI: {
                struct lnet_processid id = {};
 
-               rc = LNetGetId(data->ioc_count, &id);
+               rc = LNetGetId(data->ioc_count, &id, false);
                data->ioc_nid = lnet_nid_to_nid4(&id.nid);
                return rc;
        }
@@ -6259,15 +6259,16 @@ EXPORT_SYMBOL(LNetIsPeerLocal);
  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
  * Note that all interfaces share a same PID, as requested by LNetNIInit().
  *
- * \param index Index of the interface to look up.
- * \param id On successful return, this location will hold the
- * struct lnet_process_id ID of the interface.
+ * @index      Index of the interface to look up.
+ * @id         On successful return, this location will hold the
+ *             struct lnet_process_id ID of the interface.
+ * @large_nids Report large NIDs if this is true.
  *
- * \retval 0 If an interface exists at \a index.
- * \retval -ENOENT If no interface has been found.
+ * RETURN      0 If an interface exists at \a index.
+ *             -ENOENT If no interface has been found.
  */
 int
-LNetGetId(unsigned int index, struct lnet_processid *id)
+LNetGetId(unsigned int index, struct lnet_processid *id, bool large_nids)
 {
        struct lnet_ni   *ni;
        struct lnet_net  *net;
@@ -6280,9 +6281,9 @@ LNetGetId(unsigned int index, struct lnet_processid *id)
 
        list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
                list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
-                       if (!nid_is_nid4(&ni->ni_nid))
-                               /* FIXME this needs to be handled */
+                       if (!large_nids && !nid_is_nid4(&ni->ni_nid))
                                continue;
+
                        if (index-- != 0)
                                continue;
 
index 50ee1dd..03d55c5 100644 (file)
@@ -1681,7 +1681,7 @@ lstcon_new_session_id(struct lst_session_id *sid)
 
        LASSERT(console_session.ses_state == LST_SESSION_NONE);
 
-       LNetGetId(1, &id);
+       LNetGetId(1, &id, true);
        sid->ses_nid = id.nid;
        sid->ses_stamp = div_u64(ktime_get_ns(), NSEC_PER_MSEC);
 }
index d17b859..39f9864 100644 (file)
@@ -184,11 +184,26 @@ struct lustre_sb_info {
 #define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
 #define     get_mount_fileset(sb)  (s2lsi(sb)->lsi_lmd->lmd_fileset)
 
+/* opc for target register, see also uapi/linux/lustre/lustre_disk.h.
+ * For mti_flags the lower 16 bits are used for mount options so these
+ * have to be masked out with LDD_F_MASK. Otherwise these values will
+ * be seen as unsupported mount options. Bit 16 is already used by
+ * LDD_F_NO_LOCAL_LOGS so 17 is next free bit.
+ */
+enum ldd_target_flags {
+       LDD_F_LARGE_NID         = BIT(17),      /* 0x20000 */
+};
+
+static inline bool target_supports_large_nid(struct mgs_target_info *mti)
+{
+       return mti->mti_flags & LDD_F_LARGE_NID;
+}
+
 # ifdef HAVE_SERVER_SUPPORT
 /* opc for target register */
-#define LDD_F_OPC_REG   0x10000000
-#define LDD_F_OPC_UNREG 0x20000000
-#define LDD_F_OPC_READY 0x40000000
+#define LDD_F_OPC_REG   0x10000000     /* bit 28 */
+#define LDD_F_OPC_UNREG 0x20000000     /* bit 29 */
+#define LDD_F_OPC_READY 0x40000000     /* bit 30 */
 #define LDD_F_OPC_MASK  0xf0000000
 
 #define LDD_F_MASK     0xFFFF
index 6912a51..f69b0e3 100644 (file)
@@ -70,6 +70,7 @@
 #include <linux/errno.h>
 #include <linux/fiemap.h>
 #include <linux/types.h>
+#include <linux/lnet/nidstr.h>
 #include <linux/lnet/lnet-types.h>
 #include <linux/lustre/lustre_user.h>
 #include <linux/lustre/lustre_ver.h>
@@ -953,7 +954,8 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER | \
                                OBD_CONNECT_FLAGS2)
 
-#define MGS_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS
+#define MGS_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS | \
+                               OBD_CONNECT2_LARGE_NID
 
 /* Features required for this version of the client to work with server */
 #define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_FID |     \
@@ -2613,6 +2615,7 @@ struct mgs_send_param {
 #define MTI_NAME_MAXLEN  64
 #define MTI_PARAM_MAXLEN 4096
 #define MTI_NIDS_MAX     32
+
 struct mgs_target_info {
        __u32           mti_lustre_ver;
        __u32           mti_stripe_index;
@@ -2625,7 +2628,8 @@ struct mgs_target_info {
        char            mti_uuid[sizeof(struct obd_uuid)];
        __u64           mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t) */
        char            mti_params[MTI_PARAM_MAXLEN];
-};
+       char            mti_nidlist[][LNET_NIDSTR_SIZE];
+} __attribute__((packed));
 
 struct mgs_nidtbl_entry {
         __u64           mne_version;    /* table version of this entry */
index 8c289bb..eba1cf4 100644 (file)
@@ -4042,7 +4042,7 @@ void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
                 * in the nosquash_nids list */
                matched = false;
                i = 0;
-               while (LNetGetId(i++, &id) != -ENOENT) {
+               while (LNetGetId(i++, &id, false) != -ENOENT) {
                        if (nid_is_lo0(&id.nid))
                                continue;
                        if (cfs_match_nid(&id.nid,
index 7336d09..7f68caf 100644 (file)
@@ -1186,7 +1186,7 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         * initialize rr_index to lower 32bit of netid, so that client
         * can distribute subdirs evenly from the beginning.
         */
-       while (LNetGetId(i++, &lnet_id) != -ENOENT) {
+       while (LNetGetId(i++, &lnet_id, false) != -ENOENT) {
                if (!nid_is_lo0(&lnet_id.nid)) {
                        lmv->lmv_qos_rr_index = ntohl(lnet_id.nid.nid_addr[0]);
                        break;
@@ -1277,7 +1277,7 @@ static int lmv_select_statfs_mdt(struct lmv_obd *lmv, __u32 flags)
        /* choose initial MDT for this client */
        for (i = 0;; i++) {
                struct lnet_processid lnet_id;
-               if (LNetGetId(i, &lnet_id) == -ENOENT)
+               if (LNetGetId(i, &lnet_id, false) == -ENOENT)
                        break;
 
                if (!nid_is_lo0(&lnet_id.nid)) {
index c03541c..15b5fa6 100644 (file)
@@ -180,24 +180,40 @@ static int mgc_fs_cleanup(const struct lu_env *env, struct obd_device *obd)
 static int mgc_target_register(struct obd_export *exp,
                               struct mgs_target_info *mti)
 {
+       size_t mti_len = offsetof(struct mgs_target_info, mti_nidlist);
        struct ptlrpc_request *req;
        struct mgs_target_info *req_mti, *rep_mti;
        int rc;
 
        ENTRY;
-       req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
-                                       &RQF_MGS_TARGET_REG, LUSTRE_MGS_VERSION,
-                                       MGS_TARGET_REG);
+       req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MGS_TARGET_REG);
        if (!req)
                RETURN(-ENOMEM);
 
+       server_mti_print("mgc_target_register: req", mti);
+       if (target_supports_large_nid(mti)) {
+               mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
+
+               req_capsule_set_size(&req->rq_pill, &RMF_MGS_TARGET_INFO,
+                                    RCL_CLIENT, mti_len);
+
+               req_capsule_set_size(&req->rq_pill, &RMF_MGS_TARGET_INFO,
+                                    RCL_SERVER, mti_len);
+       }
+
+       rc = ptlrpc_request_pack(req, LUSTRE_MGS_VERSION, MGS_TARGET_REG);
+       if (rc < 0) {
+               ptlrpc_request_free(req);
+               RETURN(rc);
+       }
+
        req_mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
        if (!req_mti) {
                ptlrpc_req_finished(req);
                RETURN(-ENOMEM);
        }
 
-       memcpy(req_mti, mti, sizeof(*req_mti));
+       memcpy(req_mti, mti, mti_len);
        ptlrpc_request_set_replen(req);
        CDEBUG(D_MGC, "register %s\n", mti->mti_svname);
        /* Limit how long we will wait for the enqueue to complete */
@@ -215,12 +231,18 @@ static int mgc_target_register(struct obd_export *exp,
        if (ptlrpc_client_replied(req)) {
                rep_mti = req_capsule_server_get(&req->rq_pill,
                                                 &RMF_MGS_TARGET_INFO);
-               if (rep_mti)
-                       memcpy(mti, rep_mti, sizeof(*rep_mti));
+               if (rep_mti) {
+                       mti_len = offsetof(struct mgs_target_info, mti_nidlist);
+
+                       if (target_supports_large_nid(mti))
+                               mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
+                       memcpy(mti, rep_mti, mti_len);
+               }
        }
        if (!rc) {
                CDEBUG(D_MGC, "register %s got index = %d\n",
                       mti->mti_svname, mti->mti_stripe_index);
+               server_mti_print("mgc_target_register: rep", mti);
        }
        ptlrpc_req_finished(req);
 
@@ -238,11 +260,15 @@ int mgc_set_info_async_server(const struct lu_env *env,
        ENTRY;
        /* FIXME move this to mgc_process_config */
        if (KEY_IS(KEY_REGISTER_TARGET)) {
-               struct mgs_target_info *mti;
+               size_t mti_len = offsetof(struct mgs_target_info, mti_nidlist);
+               struct mgs_target_info *mti = val;
+
+               if (target_supports_large_nid(mti))
+                       mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
 
-               if (vallen != sizeof(struct mgs_target_info))
+               if (vallen != mti_len)
                        RETURN(-EINVAL);
-               mti = (struct mgs_target_info *)val;
+
                CDEBUG(D_MGC, "register_target %s %#x\n",
                       mti->mti_svname, mti->mti_flags);
                rc =  mgc_target_register(exp, mti);
index 650965b..e8452e0 100644 (file)
@@ -317,8 +317,19 @@ static int mgs_check_failover_reg(struct mgs_target_info *mti)
                while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
                        for (i = 0; i < mti->mti_nid_count; i++) {
                                struct lnet_nid nid2;
+                               int rc;
+
+                               if (target_supports_large_nid(mti)) {
+                                       rc = libcfs_strnid(&nid2, mti->mti_nidlist[i]);
+                                       if (rc < 0) {
+                                               LCONSOLE_WARN("NID %s is unsupported type or improper format\n",
+                                                             libcfs_nidstr(&nid));
+                                               return rc;
+                                       }
+                               } else {
+                                       lnet_nid4_to_nid(mti->mti_nids[i], &nid2);
+                               }
 
-                               lnet_nid4_to_nid(mti->mti_nids[i], &nid2);
                                if (nid_same(&nid, &nid2)) {
                                        LCONSOLE_WARN("Denying initial registration attempt from nid %s, specified as failover\n",
                                                      libcfs_nidstr(&nid));
@@ -339,11 +350,11 @@ static int mgs_target_reg(struct tgt_session_info *tsi)
        struct fs_db *b_fsdb = NULL; /* barrier fsdb */
        struct fs_db *c_fsdb = NULL; /* config fsdb */
        char barrier_name[20];
+       size_t mti_len = 0;
        int opc;
        int rc = 0;
 
        ENTRY;
-
        rc = lu_env_refill((struct lu_env *)tsi->tsi_env);
        if (rc)
                return err_serious(rc);
@@ -542,8 +553,21 @@ out_norevoke:
                mti->mti_flags |= LDD_F_ERROR;
 
        /* send back the whole mti in the reply */
+       if (target_supports_large_nid(mti)) {
+               size_t len = offsetof(struct mgs_target_info, mti_nidlist);
+               int err;
+
+               mti_len = mti->mti_nid_count * LNET_NIDSTR_SIZE;
+               err = req_capsule_server_grow(tsi->tsi_pill,
+                                             &RMF_MGS_TARGET_INFO,
+                                             len + mti_len);
+               if (err < 0)
+                       RETURN(err);
+       }
        rep_mti = req_capsule_server_get(tsi->tsi_pill, &RMF_MGS_TARGET_INFO);
        *rep_mti = *mti;
+       if (target_supports_large_nid(mti))
+               memcpy(rep_mti->mti_nidlist, mti->mti_nidlist, mti_len);
 
        /* Flush logs to disk */
        dt_sync(tsi->tsi_env, mgs->mgs_bottom);
index e334dd0..9550c1b 100644 (file)
@@ -2183,6 +2183,7 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env,
                        last_step = -1;
                        got_an_osc_or_mdc = 0;
                        memset(tmti, 0, sizeof(*tmti));
+                       tmti->mti_flags = mti->mti_flags;
                        rc = record_start_log(env, mgs, &mdt_llh,
                                              mti->mti_svname);
                        if (rc)
@@ -2209,6 +2210,7 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env,
                        last_step = -1;
                        got_an_osc_or_mdc = 0;
                        memset(tmti, 0, sizeof(*tmti));
+                       tmti->mti_flags = mti->mti_flags;
                        RETURN(rc);
                }
        }
@@ -2217,30 +2219,53 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env,
                RETURN(rc);
 
        if (lcfg->lcfg_command == LCFG_ADD_UUID) {
-               __u64 nodenid = lcfg->lcfg_nid;
-               if (!nodenid) {
-                       char *nidstr = lustre_cfg_buf(lcfg, 2);
+               lnet_nid_t nodenid = lcfg->lcfg_nid;
+               char *nidstr = NULL;
 
-                       if (nidstr) {
-                               struct lnet_nid nid;
+               if (!nodenid) {
+                       nidstr = lustre_cfg_buf(lcfg, 2);
 
-                               if (libcfs_strnid(&nid, nidstr) == 0 &&
-                                   nid_is_nid4(&nid))
-                                       nodenid = lnet_nid_to_nid4(&nid);
-                       }
+                       if (!nidstr)
+                               RETURN(-ENODEV);
                }
 
                if (strlen(tmti->mti_uuid) == 0) {
+                       char *dst = NULL;
+
+                       if (target_supports_large_nid(mti))
+                               dst = tmti->mti_nidlist[tmti->mti_nid_count];
+
                        /* target uuid not set, this config record is before
                         * LCFG_SETUP, this nid is one of target node nid.
                         */
-                       tmti->mti_nids[tmti->mti_nid_count] = nodenid;
+                       if (nidstr) {
+                               if (dst) {
+                                       rc = strscpy(dst, nidstr,
+                                                    sizeof(nidstr));
+                                       if (rc < 0)
+                                               RETURN(rc);
+                               } else {
+                                       tmti->mti_nids[tmti->mti_nid_count] =
+                                               libcfs_str2nid(nidstr);
+                               }
+                       } else {
+                               if (dst)
+                                       libcfs_nid2str_r(nodenid, dst,
+                                                        LNET_NIDSTR_SIZE);
+                               else
+                                       tmti->mti_nids[tmti->mti_nid_count] =
+                                               nodenid;
+                       }
                        tmti->mti_nid_count++;
                } else {
-                       char nidstr[LNET_NIDSTR_SIZE];
+                       char tmp[LNET_NIDSTR_SIZE];
 
+                       if (!nidstr) {
+                               libcfs_nid2str_r(nodenid, tmp,
+                                                LNET_NIDSTR_SIZE);
+                               nidstr = tmp;
+                       }
                        /* failover node nid */
-                       libcfs_nid2str_r(nodenid, nidstr, sizeof(nidstr));
                        rc = add_param(tmti->mti_params, PARAM_FAILNODE,
                                       nidstr);
                }
@@ -2311,13 +2336,14 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env,
        RETURN(rc);
 }
 
-/* fsdb->fsdb_mutex is already held  in mgs_write_log_target*/
-/* stealed from mgs_get_fsdb_from_llog*/
+/* fsdb->fsdb_mutex is already held in mgs_write_log_target */
+/* stealed from mgs_get_fsdb_from_llog */
 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
                                              struct mgs_device *mgs,
                                              char *client_name,
                                              struct temp_comp *comp)
 {
+       size_t mti_len = offsetof(struct mgs_target_info, mti_nidlist);
        struct llog_handle *loghandle;
        struct mgs_target_info *tmti;
        struct llog_ctxt *ctxt;
@@ -2328,10 +2354,17 @@ static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
        ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
        LASSERT(ctxt != NULL);
 
-       OBD_ALLOC_PTR(tmti);
-       if (tmti == NULL)
+       /* Create the mti for the osp registered by mgc_write_log_osp_to_mdt().
+        * The function mgs_steal_client_llog_handle() will fill in the rest.
+        */
+       if (target_supports_large_nid(comp->comp_mti))
+               mti_len += comp->comp_mti->mti_nid_count * LNET_NIDSTR_SIZE;
+
+       OBD_ALLOC(tmti, mti_len);
+       if (!tmti)
                GOTO(out_ctxt, rc = -ENOMEM);
 
+       tmti->mti_flags = comp->comp_mti->mti_flags;
        comp->comp_tmti = tmti;
        comp->comp_obd = mgs->mgs_obd;
 
@@ -2353,7 +2386,7 @@ static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
 out_close:
        llog_close(env, loghandle);
 out_pop:
-       OBD_FREE_PTR(tmti);
+       OBD_FREE(tmti, mti_len);
 out_ctxt:
        llog_ctxt_put(ctxt);
        RETURN(rc);
@@ -2568,16 +2601,16 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
                                    struct mgs_target_info *mti,
                                    char *logname, char *lmvname)
 {
+       char tmp[LNET_NIDSTR_SIZE], *nidstr;
        struct llog_handle *llh = NULL;
        char *mdcname = NULL;
        char *nodeuuid = NULL;
        char *mdcuuid = NULL;
        char *lmvuuid = NULL;
        char index[6];
-       char nidstr[LNET_NIDSTR_SIZE];
        int i, rc;
-       ENTRY;
 
+       ENTRY;
        if (mgs_log_is_empty(env, mgs, logname)) {
                CERROR("log is empty! Logical error\n");
                RETURN(-EINVAL);
@@ -2586,7 +2619,13 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
        CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
               mti->mti_svname, logname, lmvname);
 
-       libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
+       if (!target_supports_large_nid(mti)) {
+               libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
+               nidstr = tmp;
+       } else {
+               nidstr = mti->mti_nidlist[0];
+       }
+
        rc = name_create(&nodeuuid, nidstr, "");
        if (rc)
                RETURN(rc);
@@ -2612,13 +2651,19 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
                           "add mdc");
        if (rc)
                GOTO(out_end, rc);
+
        for (i = 0; i < mti->mti_nid_count; i++) {
                struct lnet_nid nid;
 
-               lnet_nid4_to_nid(mti->mti_nids[i], &nid);
-               CDEBUG(D_MGS, "add nid %s for mdt\n",
-                      libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr)));
+               if (target_supports_large_nid(mti)) {
+                       rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
+                       if (rc < 0)
+                               GOTO(out_end, rc);
+               } else {
+                       lnet_nid4_to_nid(mti->mti_nids[i], &nid);
+               }
 
+               CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
                rc = record_add_uuid(env, llh, &nid, nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
@@ -2702,6 +2747,7 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
                                    struct mgs_target_info *mti,
                                    int mdt_index, char *logname)
 {
+       char tmp[LNET_NIDSTR_SIZE], *nidstr;
        struct llog_handle      *llh = NULL;
        char    *nodeuuid = NULL;
        char    *ospname = NULL;
@@ -2711,7 +2757,6 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
        char    *mdtname = NULL;
        char    *lovname = NULL;
        char    index_str[16];
-       char    nidstr[LNET_NIDSTR_SIZE];
        int     i, rc;
 
        ENTRY;
@@ -2727,7 +2772,13 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
        if (rc)
                RETURN(rc);
 
-       libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
+       if (!target_supports_large_nid(mti)) {
+               libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
+               nidstr = tmp;
+       } else {
+               nidstr = mti->mti_nidlist[0];
+       }
+
        rc = name_create(&nodeuuid, nidstr, "");
        if (rc)
                GOTO(out_destory, rc);
@@ -2770,9 +2821,15 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
        for (i = 0; i < mti->mti_nid_count; i++) {
                struct lnet_nid nid;
 
-               lnet_nid4_to_nid(mti->mti_nids[i], &nid);
-               CDEBUG(D_MGS, "add nid %s for mdt\n",
-                      libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr)));
+               if (target_supports_large_nid(mti)) {
+                       rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
+                       if (rc < 0)
+                               GOTO(out_end, rc);
+               } else {
+                       lnet_nid4_to_nid(mti->mti_nids[i], &nid);
+               }
+
+               CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
                rc = record_add_uuid(env, llh, &nid, nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
@@ -2905,6 +2962,7 @@ static int mgs_write_log_mdt(const struct lu_env *env,
        rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
        if (rc)
                RETURN(rc);
+
        /* Append the mdt info to the client log */
        rc = name_create(&cliname, mti->mti_fsname, "-client");
        if (rc)
@@ -3007,6 +3065,7 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
                                    char *logname, char *suffix, char *lovname,
                                    enum lustre_sec_part sec_part, int flags)
 {
+       char tmp[LNET_NIDSTR_SIZE], *nidstr;
        struct llog_handle *llh = NULL;
        char *nodeuuid = NULL;
        char *oscname = NULL;
@@ -3014,10 +3073,9 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
        char *lovuuid = NULL;
        char *svname = NULL;
        char index[6];
-       char nidstr[LNET_NIDSTR_SIZE];
        int i, rc;
-       ENTRY;
 
+       ENTRY;
        CDEBUG(D_INFO, "adding osc for %s to log %s\n",
               mti->mti_svname, logname);
 
@@ -3026,8 +3084,14 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
                RETURN(-EINVAL);
        }
 
-       libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
-       rc = name_create(&nodeuuid, nidstr, "");
+       if (!target_supports_large_nid(mti)) {
+               libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
+               nidstr = tmp;
+       } else {
+               nidstr = mti->mti_nidlist[0];
+       }
+
+       rc = name_create(&nodeuuid, mti->mti_nidlist[0], "");
        if (rc)
                RETURN(rc);
        rc = name_create(&svname, mti->mti_svname, "-osc");
@@ -3080,13 +3144,16 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
        for (i = 0; i < mti->mti_nid_count; i++) {
                struct lnet_nid nid;
 
-               lnet_nid4_to_nid(mti->mti_nids[i], &nid);
-               CDEBUG(D_MGS, "add nid %s\n",
-                      libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr)));
+               rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
+               if (rc < 0)
+                       GOTO(out_end, rc);
+
+               CDEBUG(D_MGS, "add nid %s\n", libcfs_nidstr(&nid));
                rc = record_add_uuid(env, llh, &nid, nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
        }
+
        rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
        if (rc)
                GOTO(out_end, rc);
index d9fdd36..969e06a 100644 (file)
@@ -111,13 +111,15 @@ static int mgs_nidtbl_read(struct obd_export *exp, struct mgs_nidtbl *tbl,
         */
        list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
                int entry_len = sizeof(*entry);
+               int i;
 
                if (tgt->mnt_version < version)
                        continue;
 
                /* write target recover information */
                mti  = &tgt->mnt_mti;
-               LASSERT(mti->mti_nid_count < MTI_NIDS_MAX);
+               if (!target_supports_large_nid(mti))
+                       LASSERT(mti->mti_nid_count < MTI_NIDS_MAX);
                entry_len += mti->mti_nid_count * sizeof(lnet_nid_t);
 
                if (entry_len > unit_size) {
@@ -177,9 +179,36 @@ static int mgs_nidtbl_read(struct obd_export *exp, struct mgs_nidtbl *tbl,
                entry->mne_type      = tgt->mnt_type;
                entry->mne_nid_type  = 0;
                entry->mne_nid_size  = sizeof(lnet_nid_t);
-               entry->mne_nid_count = mti->mti_nid_count;
-               memcpy(entry->u.nids, mti->mti_nids,
-                      mti->mti_nid_count * sizeof(lnet_nid_t));
+               /* We have been sent the newer larger NID format but the
+                * current nidtbl doesn't support it. So filter the NIDs
+                * sent to reject any real larger size NIDS.
+                */
+               if (target_supports_large_nid(mti)) {
+                       entry->mne_nid_count = 0;
+
+                       for (i = 0; i < mti->mti_nid_count; i++) {
+                               struct lnet_nid nid;
+                               int err;
+
+                               err = libcfs_strnid(&nid, mti->mti_nidlist[i]);
+                               if (err < 0)
+                                       GOTO(out, rc = err);
+
+                               /* if the large NID format represents a small
+                                * address space we can still pass it back to
+                                * the older clients.
+                                */
+                               if (nid_is_nid4(&nid)) {
+                                       entry->u.nids[entry->mne_nid_count] =
+                                               lnet_nid_to_nid4(&nid);
+                                       entry->mne_nid_count++;
+                               }
+                       }
+               } else {
+                       entry->mne_nid_count = mti->mti_nid_count;
+                       memcpy(entry->u.nids, mti->mti_nids,
+                              mti->mti_nid_count * sizeof(lnet_nid_t));
+               }
 
                version = tgt->mnt_version;
                rc     += entry_len;
@@ -312,10 +341,10 @@ static int mgs_nidtbl_write(const struct lu_env *env, struct fs_db *fsdb,
        struct mgs_nidtbl_target *tgt;
        bool found = false;
        int type = mti->mti_flags & LDD_F_SV_TYPE_MASK;
+       size_t mti_len = 0;
        int rc = 0;
 
        ENTRY;
-
        type &= ~LDD_F_SV_TYPE_MGS;
        LASSERT(type != 0);
 
@@ -330,8 +359,14 @@ static int mgs_nidtbl_write(const struct lu_env *env, struct fs_db *fsdb,
                        break;
                }
        }
+
+       if (target_supports_large_nid(mti))
+               mti_len = mti->mti_nid_count * LNET_NIDSTR_SIZE;
        if (!found) {
-               OBD_ALLOC_PTR(tgt);
+               size_t len = offsetof(struct mgs_nidtbl_target,
+                                     mnt_mti.mti_nidlist);
+
+               OBD_ALLOC(tgt, len + mti_len);
                if (!tgt)
                        GOTO(out, rc = -ENOMEM);
 
@@ -344,7 +379,9 @@ static int mgs_nidtbl_write(const struct lu_env *env, struct fs_db *fsdb,
        }
 
        tgt->mnt_version = ++tbl->mn_version;
-       tgt->mnt_mti     = *mti;
+       tgt->mnt_mti = *mti;
+       if (target_supports_large_nid(mti))
+               memcpy(tgt->mnt_mti.mti_nidlist, mti->mti_nidlist, mti_len);
 
        list_move_tail(&tgt->mnt_list, &tbl->mn_targets);
 
@@ -362,6 +399,8 @@ out:
 static void mgs_nidtbl_fini_fs(struct fs_db *fsdb)
 {
        struct mgs_nidtbl *tbl = &fsdb->fsdb_nidtbl;
+       size_t len = offsetof(struct mgs_nidtbl_target,
+                             mnt_mti.mti_nidlist);
        LIST_HEAD(head);
 
        mutex_lock(&tbl->mn_lock);
@@ -371,11 +410,14 @@ static void mgs_nidtbl_fini_fs(struct fs_db *fsdb)
 
        while (!list_empty(&head)) {
                struct mgs_nidtbl_target *tgt;
+               size_t mti_len = 0;
 
                tgt = list_first_entry(&head, struct mgs_nidtbl_target,
                                       mnt_list);
+               if (target_supports_large_nid(&tgt->mnt_mti))
+                       mti_len += tgt->mnt_mti.mti_nid_count * LNET_NIDSTR_SIZE;
                list_del(&tgt->mnt_list);
-               OBD_FREE_PTR(tgt);
+               OBD_FREE(tgt, len + mti_len);
        }
 }
 
index a43a659..a586148 100644 (file)
@@ -256,7 +256,7 @@ int lustre_start_mgc(struct super_block *sb)
                } else if (IS_MGS(lsi)) {
                        struct lnet_processid id;
 
-                       while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+                       while ((rc = LNetGetId(i++, &id, true)) != -ENOENT) {
                                if (nid_is_lo0(&id.nid))
                                        continue;
                                nid = id.nid;
@@ -367,7 +367,7 @@ int lustre_start_mgc(struct super_block *sb)
                        /* Use local NIDs (including LO) */
                        struct lnet_processid id;
 
-                       while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+                       while ((rc = LNetGetId(i++, &id, true)) != -ENOENT) {
                                rc = do_lcfg_nid(mgcname, &id.nid,
                                                 LCFG_ADD_UUID,
                                                 niduuid);
@@ -483,7 +483,8 @@ int lustre_start_mgc(struct super_block *sb)
                                  OBD_CONNECT_LVB_TYPE |
                                  OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER |
                                  OBD_CONNECT_FLAGS2;
-       data->ocd_connect_flags2 = OBD_CONNECT2_REP_MBITS;
+       data->ocd_connect_flags2 = OBD_CONNECT2_REP_MBITS |
+                                  OBD_CONNECT2_LARGE_NID;
 
        if (lmd_is_client(lsi->lsi_lmd) &&
            test_bit(LMD_FLG_NOIR, lsi->lsi_lmd->lmd_flags))
index 5114528..97ea82d 100644 (file)
@@ -270,7 +270,7 @@ struct lu_nodemap *nodemap_classify_nid(lnet_nid_t nid)
                int i = 0;
 
                do {
-                       rc = LNetGetId(i++, &id);
+                       rc = LNetGetId(i++, &id, false);
                        if (rc < 0)
                                RETURN(ERR_PTR(-EINVAL));
                } while (nid_is_lo0(&id.nid));
index 717a511..889109a 100644 (file)
@@ -44,6 +44,7 @@
 #include <libcfs/libcfs.h>
 
 #include <llog_swab.h>
+#include <lustre_disk.h>
 #include <lustre_net.h>
 #include <lustre_swab.h>
 #include <obd_cksum.h>
@@ -2076,7 +2077,12 @@ void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
        __swab32s(&mti->mti_flags);
        __swab32s(&mti->mti_instance);
        __swab32s(&mti->mti_nid_count);
-       BUILD_BUG_ON(sizeof(lnet_nid_t) != sizeof(__u64));
+       BUILD_BUG_ON(sizeof(lnet_nid_t) != sizeof(u64));
+
+       /* For NID string we never need to swab */
+       if (target_supports_large_nid(mti))
+               return;
+
        for (i = 0; i < MTI_NIDS_MAX; i++)
                __swab64s(&mti->mti_nids[i]);
 }
index 951be38..7676db9 100644 (file)
@@ -4638,6 +4638,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct mgs_target_info, mti_params));
        LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n",
                 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params));
+       LASSERTF((int)offsetof(struct mgs_target_info, mti_nidlist[0]) == 4544, "found %lld\n",
+                (long long)(int)offsetof(struct mgs_target_info, mti_nidlist[0]));
+       LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nidlist[0]) == 64, "found %lld\n",
+                (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nidlist[0]));
 
        /* Checks for struct mgs_nidtbl_entry */
        LASSERTF((int)sizeof(struct mgs_nidtbl_entry) == 24, "found %lld\n",
index 1358b35..2a312f2 100644 (file)
  *
  * Author: Nathan Rutman <nathan@clusterfs.com>
  */
-
-
 #define DEBUG_SUBSYSTEM S_CLASS
 #define D_MOUNT (D_SUPER | D_CONFIG /* | D_WARNING */)
-#define PRINT_CMD CDEBUG
-#define PRINT_MASK (D_SUPER | D_CONFIG)
 
 #include <linux/types.h>
+#include <linux/generic-radix-tree.h>
 #ifdef HAVE_LINUX_SELINUX_IS_ENABLED
 #include <linux/selinux.h>
 #endif
@@ -1159,33 +1156,80 @@ static int server_stop_servers(int lsiflags)
 
 int server_mti_print(const char *title, struct mgs_target_info *mti)
 {
-       PRINT_CMD(PRINT_MASK, "mti %s\n", title);
-       PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
-       PRINT_CMD(PRINT_MASK, "fs:     %s\n", mti->mti_fsname);
-       PRINT_CMD(PRINT_MASK, "uuid:   %s\n", mti->mti_uuid);
-       PRINT_CMD(PRINT_MASK, "ver: %d  flags: %#x\n",
-                 mti->mti_config_ver, mti->mti_flags);
+       CDEBUG(D_MOUNT, "mti - %s\n", title);
+       CDEBUG(D_MOUNT, "server: %s\n", mti->mti_svname);
+       CDEBUG(D_MOUNT, "fs:     %s\n", mti->mti_fsname);
+       CDEBUG(D_MOUNT, "uuid:   %s\n", mti->mti_uuid);
+       CDEBUG(D_MOUNT, "ver:    %d\n", mti->mti_config_ver);
+       CDEBUG(D_MOUNT, "flags:\n");
+       if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
+               CDEBUG(D_MOUNT, "        LDD_F_SV_TYPE_MDT\n");
+       if (mti->mti_flags & LDD_F_SV_TYPE_OST)
+               CDEBUG(D_MOUNT, "        LDD_F_SV_TYPE_OST\n");
+       if (mti->mti_flags & LDD_F_SV_TYPE_MGS)
+               CDEBUG(D_MOUNT, "        LDD_F_SV_TYPE_MGS\n");
+       if (mti->mti_flags & LDD_F_SV_ALL)
+               CDEBUG(D_MOUNT, "        LDD_F_SV_ALL\n");
+       if (mti->mti_flags & LDD_F_NEED_INDEX)
+               CDEBUG(D_MOUNT, "        LDD_F_NEED_INDEX\n");
+       if (mti->mti_flags & LDD_F_VIRGIN)
+               CDEBUG(D_MOUNT, "        LDD_F_VIRIGIN\n");
+       if (mti->mti_flags & LDD_F_UPDATE)
+               CDEBUG(D_MOUNT, "        LDD_F_UPDATE\n");
+       if (mti->mti_flags & LDD_F_REWRITE_LDD)
+               CDEBUG(D_MOUNT, "        LDD_F_REWRITE_LDD\n");
+       if (mti->mti_flags & LDD_F_WRITECONF)
+               CDEBUG(D_MOUNT, "        LDD_F_WRITECONF\n");
+       if (mti->mti_flags & LDD_F_PARAM)
+               CDEBUG(D_MOUNT, "        LDD_F_PARAM\n");
+       if (mti->mti_flags & LDD_F_NO_PRIMNODE)
+               CDEBUG(D_MOUNT, "        LDD_F_NO_PRIMNODE\n");
+       if (mti->mti_flags & LDD_F_IR_CAPABLE)
+               CDEBUG(D_MOUNT, "        LDD_F_IR_CAPABLE\n");
+       if (mti->mti_flags & LDD_F_ERROR)
+               CDEBUG(D_MOUNT, "        LDD_F_ERROR\n");
+       if (mti->mti_flags & LDD_F_PARAM2)
+               CDEBUG(D_MOUNT, "        LDD_F_PARAM2\n");
+       if (mti->mti_flags & LDD_F_NO_LOCAL_LOGS)
+               CDEBUG(D_MOUNT, "        LDD_F_NO_LOCAL_LOGS\n");
+
+       /* Upper 16 bits for target registering */
+       if (target_supports_large_nid(mti))
+               CDEBUG(D_MOUNT, "        LDD_F_LARGE_NID\n");
+       if (mti->mti_flags & LDD_F_OPC_REG)
+               CDEBUG(D_MOUNT, "        LDD_F_OPC_REG\n");
+       if (mti->mti_flags & LDD_F_OPC_UNREG)
+               CDEBUG(D_MOUNT, "        LDD_F_OPC_UNREG\n");
+       if (mti->mti_flags & LDD_F_OPC_READY)
+               CDEBUG(D_MOUNT, "        LDD_F_OPC_READY\n");
+
        return 0;
 }
+EXPORT_SYMBOL(server_mti_print);
 
 /* Generate data for registration */
-static int server_lsi2mti(struct lustre_sb_info *lsi,
-                         struct mgs_target_info *mti)
+static struct mgs_target_info *server_lsi2mti(struct lustre_sb_info *lsi)
 {
-       struct lnet_processid id;
+       size_t len = offsetof(struct mgs_target_info, mti_nidlist);
+       GENRADIX(struct lnet_processid) plist;
+       struct lnet_processid id, *tmp;
+       struct mgs_target_info *mti;
+       bool large_nid = false;
+       int nid_count = 0;
        int rc, i = 0;
        int cplen = 0;
 
        ENTRY;
        if (!IS_SERVER(lsi))
-               RETURN(-EINVAL);
+               RETURN(ERR_PTR(-EINVAL));
 
-       if (strlcpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname))
-           >= sizeof(mti->mti_svname))
-               RETURN(-E2BIG);
+       if (exp_connect_flags2(lsi->lsi_mgc->u.cli.cl_mgc_mgsexp) &
+           OBD_CONNECT2_LARGE_NID)
+               large_nid = true;
 
-       mti->mti_nid_count = 0;
-       while (LNetGetId(i++, &id) != -ENOENT) {
+       genradix_init(&plist);
+
+       while (LNetGetId(i++, &id, large_nid) != -ENOENT) {
                if (nid_is_lo0(&id.nid))
                        continue;
 
@@ -1204,7 +1248,7 @@ static int server_lsi2mti(struct lustre_sb_info *lsi,
                                       " on this node. 'network' option used in"
                                       " mkfs.lustre cannot be taken into"
                                       " account.\n");
-                               RETURN(-EINVAL);
+                               GOTO(free_list, mti = ERR_PTR(-EINVAL));
                        }
                }
 
@@ -1213,42 +1257,72 @@ static int server_lsi2mti(struct lustre_sb_info *lsi,
                                     PARAM_NETWORK, LNET_NID_NET(&id.nid)))
                        continue;
 
-               mti->mti_nids[mti->mti_nid_count] = lnet_nid_to_nid4(&id.nid);
-               mti->mti_nid_count++;
-               if (mti->mti_nid_count >= MTI_NIDS_MAX) {
-                       CWARN("Only using first %d nids for %s\n",
-                             mti->mti_nid_count, mti->mti_svname);
-                       break;
-               }
+               tmp = genradix_ptr_alloc(&plist, nid_count++, GFP_KERNEL);
+               if (!tmp)
+                       GOTO(free_list, mti = ERR_PTR(-ENOMEM));
+
+               if (large_nid)
+                       len += LNET_NIDSTR_SIZE;
+               *tmp = id;
        }
 
-       if (mti->mti_nid_count == 0) {
+       if (nid_count == 0) {
                CERROR("Failed to get NID for server %s, please check whether the target is specifed with improper --servicenode or --network options.\n",
-                      mti->mti_svname);
-               RETURN(-EINVAL);
+                      lsi->lsi_svname);
+               GOTO(free_list, mti = ERR_PTR(-EINVAL));
        }
 
+       OBD_ALLOC(mti, len);
+       if (!mti)
+               GOTO(free_list, mti = ERR_PTR(-ENOMEM));
+
+       if (strlcpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname))
+           >= sizeof(mti->mti_svname))
+               GOTO(free_mti, rc = -E2BIG);
+
+       mti->mti_nid_count = nid_count;
+       for (i = 0; i < mti->mti_nid_count; i++) {
+               tmp = genradix_ptr(&plist, i);
+
+               if (large_nid)
+                       libcfs_nidstr_r(&tmp->nid, mti->mti_nidlist[i],
+                                       sizeof(mti->mti_nidlist[i]));
+               else
+                       mti->mti_nids[i] = lnet_nid_to_nid4(&tmp->nid);
+       }
        mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
        mti->mti_config_ver = 0;
 
        rc = server_name2fsname(lsi->lsi_svname, mti->mti_fsname, NULL);
-       if (rc != 0)
-               return rc;
+       if (rc < 0)
+               GOTO(free_mti, rc);
 
        rc = server_name2index(lsi->lsi_svname, &mti->mti_stripe_index, NULL);
        if (rc < 0)
-               return rc;
+               GOTO(free_mti, rc);
+
        /* Orion requires index to be set */
        LASSERT(!(rc & LDD_F_NEED_INDEX));
        /* keep only LDD flags */
        mti->mti_flags = lsi->lsi_flags & LDD_F_MASK;
        if (mti->mti_flags & (LDD_F_WRITECONF | LDD_F_VIRGIN))
                mti->mti_flags |= LDD_F_UPDATE;
+       /* use NID strings instead */
+       if (large_nid)
+               mti->mti_flags |= LDD_F_LARGE_NID;
        cplen = strlcpy(mti->mti_params, lsi->lsi_lmd->lmd_params,
                        sizeof(mti->mti_params));
        if (cplen >= sizeof(mti->mti_params))
-               return -E2BIG;
-       return 0;
+               rc = -E2BIG;
+free_mti:
+       if (rc < 0) {
+               OBD_FREE(mti, len);
+               mti = ERR_PTR(rc);
+       }
+free_list:
+       genradix_free(&plist);
+
+       return mti;
 }
 
 /* Register an old or new target with the MGS. If needed MGS will construct
@@ -1258,42 +1332,36 @@ static int server_register_target(struct lustre_sb_info *lsi)
 {
        struct obd_device *mgc = lsi->lsi_mgc;
        struct mgs_target_info *mti = NULL;
+       size_t mti_len = sizeof(*mti);
        bool must_succeed;
        int rc;
        int tried = 0;
 
        ENTRY;
        LASSERT(mgc);
-
-       if (!IS_SERVER(lsi))
-               RETURN(-EINVAL);
-
-       OBD_ALLOC_PTR(mti);
-       if (!mti)
-               RETURN(-ENOMEM);
-
-       rc = server_lsi2mti(lsi, mti);
-       if (rc < 0)
-               GOTO(out, rc);
+       mti = server_lsi2mti(lsi);
+       if (IS_ERR(mti))
+               GOTO(out, rc = PTR_ERR(mti));
 
        CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
-              mti->mti_svname, mti->mti_fsname,
-              libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
-              mti->mti_flags);
+              mti->mti_svname, mti->mti_fsname, mti->mti_nidlist[0],
+              mti->mti_stripe_index, mti->mti_flags);
 
        /* we cannot ignore registration failure if MGS logs must be updated. */
        must_succeed = !!(lsi->lsi_flags &
                    (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_WRITECONF |
                     LDD_F_VIRGIN));
        mti->mti_flags |= LDD_F_OPC_REG;
-
+       if (target_supports_large_nid(mti))
+               mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
+       server_mti_print("server_register_target", mti);
 again:
        /* Register the target */
        /* FIXME use mgc_process_config instead */
        rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
                                sizeof(KEY_REGISTER_TARGET),
                                KEY_REGISTER_TARGET,
-                               sizeof(*mti), mti, NULL);
+                               mti_len, mti, NULL);
        if (rc < 0) {
                if (mti->mti_flags & LDD_F_ERROR) {
                        LCONSOLE_ERROR_MSG(0x160,
@@ -1319,12 +1387,10 @@ again:
                        /* reset the error code for non-fatal error. */
                        rc = 0;
                }
-               GOTO(out, rc);
        }
 
+       OBD_FREE(mti, mti_len);
 out:
-       if (mti)
-               OBD_FREE_PTR(mti);
        RETURN(rc);
 }
 
@@ -1337,40 +1403,35 @@ static int server_notify_target(struct super_block *sb, struct obd_device *obd)
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *mgc = lsi->lsi_mgc;
        struct mgs_target_info *mti = NULL;
+       size_t mti_len = sizeof(*mti);
        int rc;
 
        ENTRY;
        LASSERT(mgc);
-
-       if (!(IS_SERVER(lsi)))
-               RETURN(-EINVAL);
-
-       OBD_ALLOC_PTR(mti);
-       if (!mti)
-               RETURN(-ENOMEM);
-       rc = server_lsi2mti(lsi, mti);
-       if (rc < 0)
-               GOTO(out, rc);
+       mti = server_lsi2mti(lsi);
+       if (IS_ERR(mti))
+               GOTO(out, rc = PTR_ERR(mti));
 
        mti->mti_instance = obd2obt(obd)->obt_instance;
        mti->mti_flags |= LDD_F_OPC_READY;
+       if (target_supports_large_nid(mti))
+               mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
+       server_mti_print("server_notify_target", mti);
 
        /* FIXME use mgc_process_config instead */
        rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
                                sizeof(KEY_REGISTER_TARGET),
                                KEY_REGISTER_TARGET,
-                               sizeof(*mti), mti, NULL);
+                               mti_len, mti, NULL);
 
        /* Imperative recovery: if the mgs informs us to use IR? */
        if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
            (mti->mti_flags & LDD_F_IR_CAPABLE))
                lsi->lsi_flags |= LDD_F_IR_CAPABLE;
 
+       OBD_FREE(mti, mti_len);
 out:
-       if (mti)
-               OBD_FREE_PTR(mti);
        RETURN(rc);
-
 }
 
 /* Start server targets: MDTs and OSTs */
index 5b8adbf..39306a5 100644 (file)
@@ -2162,6 +2162,7 @@ check_mgs_target_info(void)
        CHECK_MEMBER(mgs_target_info, mti_uuid);
        CHECK_MEMBER(mgs_target_info, mti_nids);
        CHECK_MEMBER(mgs_target_info, mti_params);
+       CHECK_MEMBER(mgs_target_info, mti_nidlist[0]);
 }
 
 static void
index 00ec09f..99be9fb 100644 (file)
@@ -4699,6 +4699,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct mgs_target_info, mti_params));
        LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n",
                 (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params));
+       LASSERTF((int)offsetof(struct mgs_target_info, mti_nidlist[0]) == 4544, "found %lld\n",
+                (long long)(int)offsetof(struct mgs_target_info, mti_nidlist[0]));
+       LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nidlist[0]) == 64, "found %lld\n",
+                (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nidlist[0]));
 
        /* Checks for struct mgs_nidtbl_entry */
        LASSERTF((int)sizeof(struct mgs_nidtbl_entry) == 24, "found %lld\n",