Whamcloud - gitweb
LU-7101 lnet: per NI map-on-demand value 67/16367/18
authorAmir Shehata <amir.shehata@intel.com>
Tue, 15 Mar 2016 19:39:54 +0000 (15:39 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 7 Apr 2016 15:36:11 +0000 (15:36 +0000)
Enables support of different map-on-demand values per NI.  This is
required to support OPA coexistence with MLX5 cards.  MLX5 does not
support FMR, which is enabled via map-on-demand.  However OPA's
performance is greatly enahanced when FMR is enabled.  In order
to enable coexistence of both of these two types of cards we
need to be able to set different map-on-demand values for both
NIs.

This patch also lays the ground work for other per NI tunables to
be added in future patches.

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Signed-off-by: James Simmons <uja.ornl@yahoo.com>
Change-Id: Ic7617d3d5846e58f83e7c67bb9eb7173700be8d7
Reviewed-on: http://review.whamcloud.com/16367
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Olaf Weber <olaf@sgi.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
16 files changed:
lnet/include/lnet/lib-dlc.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c
lnet/lnet/api-ni.c
lnet/lnet/config.c
lnet/lnet/module.c
lnet/utils/lnetconfig/Makefile.am
lnet/utils/lnetconfig/liblnd.h [new file with mode: 0644]
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetconfig/liblnetconfig.h
lnet/utils/lnetconfig/liblnetconfig_lnd.c [new file with mode: 0644]
lnet/utils/lnetctl.c

index 749daf2..b064c0b 100644 (file)
 #define LNET_MAX_SHOW_NUM_CPT  128
 #define LNET_UNDEFINED_HOPS    ((__u32) -1)
 
+struct lnet_ioctl_config_lnd_cmn_tunables {
+       __u32 lct_version;
+       __u32 lct_peer_timeout;
+       __u32 lct_peer_tx_credits;
+       __u32 lct_peer_rtr_credits;
+       __u32 lct_max_tx_credits;
+};
+
+struct lnet_ioctl_config_o2iblnd_tunables {
+       __u32 lnd_version;
+       __u32 lnd_peercredits_hiw;
+       __u32 lnd_map_on_demand;
+       __u32 lnd_concurrent_sends;
+       __u32 lnd_fmr_pool_size;
+       __u32 lnd_fmr_flush_trigger;
+       __u32 lnd_fmr_cache;
+       __u32 pad;
+};
+
+struct lnet_ioctl_config_lnd_tunables {
+       struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
+       union {
+               struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
+       } lt_tun_u;
+};
+
 struct lnet_ioctl_net_config {
        char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
        __u32 ni_status;
        __u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT];
+       char cfg_bulk[0];
 };
 
 #define LNET_TINY_BUF_IDX      0
@@ -81,7 +108,7 @@ struct lnet_ioctl_config_data {
                        __s32 net_peer_rtr_credits;
                        __s32 net_max_tx_credits;
                        __u32 net_cksum_algo;
-                       __u32 net_pad;
+                       __u32 net_interface_count;
                } cfg_net;
                struct {
                        __u32 buf_enable;
index 3a74c4f..398ca78 100644 (file)
@@ -471,14 +471,6 @@ int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
 void lnet_destroy_routes(void);
 int lnet_get_route(int idx, __u32 *net, __u32 *hops,
                   lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
-int lnet_get_net_config(int idx,
-                       __u32 *cpt_count,
-                       __u64 *nid,
-                       int *peer_timeout,
-                       int *peer_tx_credits,
-                       int *peer_rtr_cr,
-                       int *max_tx_credits,
-                       struct lnet_ioctl_net_config *net_config);
 int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
 
 struct libcfs_ioctl_handler {
@@ -506,9 +498,8 @@ int lnet_rtrpools_enable(void);
 void lnet_rtrpools_disable(void);
 void lnet_rtrpools_free(int keep_pools);
 lnet_remotenet_t *lnet_find_net_locked (__u32 net);
-int lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
-                   __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
-                   __s32 credits);
+int lnet_dyn_add_ni(lnet_pid_t requested_pid,
+                   struct lnet_ioctl_config_data *conf);
 int lnet_dyn_del_ni(__u32 net);
 int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
 
index fca5446..c61e669 100644 (file)
@@ -287,6 +287,8 @@ typedef struct lnet_ni {
        int                     **ni_refs;      /* percpt reference count */
        long                    ni_last_alive;  /* when I was last alive */
        lnet_ni_status_t        *ni_status;     /* my health status */
+       /* per NI LND tunables */
+       struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
        /* equivalent interfaces to use */
        char                    *ni_interfaces[LNET_MAX_INTERFACES];
 } lnet_ni_t;
index 100ea1b..e458797 100644 (file)
@@ -337,8 +337,8 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
        peer->ibp_nid = nid;
        peer->ibp_error = 0;
        peer->ibp_last_alive = 0;
-       peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS;
-       peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits;
+       peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
+       peer->ibp_queue_depth = ni->ni_peertxcredits;
        atomic_set(&peer->ibp_refcount, 1);     /* 1 ref for caller */
 
        INIT_LIST_HEAD(&peer->ibp_list);        /* not in the peer table yet */
@@ -1386,16 +1386,22 @@ kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
 }
 
 struct ib_mr *
-kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
+kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
                      int negotiated_nfrags)
 {
-       __u16   nfrags = (negotiated_nfrags != -1) ?
-         negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand;
+       kib_net_t     *net   = ni->ni_data;
+       kib_hca_dev_t *hdev  = net->ibn_dev->ibd_hdev;
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+       int     mod;
+       __u16   nfrags;
+
+       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       mod = tunables->lnd_map_on_demand;
+       nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
 
        LASSERT(hdev->ibh_mrs != NULL);
 
-       if (*kiblnd_tunables.kib_map_on_demand > 0 &&
-           nfrags <= rd->rd_nfrags)
+       if (mod > 0 && nfrags <= rd->rd_nfrags)
                return NULL;
 
        return hdev->ibh_mrs;
@@ -1443,16 +1449,20 @@ kiblnd_destroy_fmr_pool_list(struct list_head *head)
        }
 }
 
-static int kiblnd_fmr_pool_size(int ncpts)
+static int
+kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+                    int ncpts)
 {
-       int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts;
+       int size = tunables->lnd_fmr_pool_size / ncpts;
 
        return max(IBLND_FMR_POOL, size);
 }
 
-static int kiblnd_fmr_flush_trigger(int ncpts)
+static int
+kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+                        int ncpts)
 {
-       int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts;
+       int size = tunables->lnd_fmr_flush_trigger / ncpts;
 
        return max(IBLND_FMR_POOL_FLUSH, size);
 }
@@ -1468,7 +1478,7 @@ static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
                .dirty_watermark   = fps->fps_flush_trigger,
                .flush_function    = NULL,
                .flush_arg         = NULL,
-               .cache             = !!*kiblnd_tunables.kib_fmr_cache};
+               .cache             = !!fps->fps_cache };
        int rc = 0;
 
        fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
@@ -1644,8 +1654,9 @@ kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps)
 }
 
 static int
-kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net,
-                       int pool_size, int flush_trigger)
+kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts,
+                       kib_net_t *net,
+                       struct lnet_ioctl_config_o2iblnd_tunables *tunables)
 {
        kib_fmr_pool_t *fpo;
        int             rc;
@@ -1654,8 +1665,11 @@ kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net,
 
        fps->fps_net = net;
        fps->fps_cpt = cpt;
-       fps->fps_pool_size = pool_size;
-       fps->fps_flush_trigger = flush_trigger;
+
+       fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
+       fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
+       fps->fps_cache = tunables->lnd_fmr_cache;
+
        spin_lock_init(&fps->fps_lock);
        INIT_LIST_HEAD(&fps->fps_pool_list);
        INIT_LIST_HEAD(&fps->fps_failed_pool_list);
@@ -2271,15 +2285,18 @@ kiblnd_net_fini_pools(kib_net_t *net)
 }
 
 static int
-kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
+kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, int ncpts)
 {
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
        unsigned long   flags;
        int             cpt;
        int             rc;
        int             i;
 
+       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-       if (*kiblnd_tunables.kib_map_on_demand == 0) {
+       if (tunables->lnd_map_on_demand == 0) {
                read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
                                           flags);
                goto create_tx_pool;
@@ -2287,10 +2304,9 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 
        read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
-       if (*kiblnd_tunables.kib_fmr_pool_size <
-           *kiblnd_tunables.kib_ntx / 4) {
+       if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
                CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
-                      *kiblnd_tunables.kib_fmr_pool_size,
+                      tunables->lnd_fmr_pool_size,
                       *kiblnd_tunables.kib_ntx / 4);
                rc = -EINVAL;
                goto failed;
@@ -2313,9 +2329,8 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
 
        for (i = 0; i < ncpts; i++) {
                cpt = (cpts == NULL) ? i : cpts[i];
-               rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net,
-                                            kiblnd_fmr_pool_size(ncpts),
-                                            kiblnd_fmr_flush_trigger(ncpts));
+               rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
+                                            net, tunables);
                if (rc != 0) {
                        CERROR("Can't initialize FMR pool for CPT %d: %d\n",
                               cpt, rc);
@@ -3070,10 +3085,7 @@ kiblnd_startup (lnet_ni_t *ni)
        do_gettimeofday(&tv);
        net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
 
-        ni->ni_peertimeout    = *kiblnd_tunables.kib_peertimeout;
-        ni->ni_maxtxcredits   = *kiblnd_tunables.kib_credits;
-        ni->ni_peertxcredits  = *kiblnd_tunables.kib_peertxcredits;
-        ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits;
+       kiblnd_tunables_setup(ni);
 
         if (ni->ni_interfaces[0] != NULL) {
                 /* Use the IPoIB interface specified in 'networks=' */
@@ -3112,7 +3124,7 @@ kiblnd_startup (lnet_ni_t *ni)
        if (rc != 0)
                goto failed;
 
-       rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts);
+       rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
         if (rc != 0) {
                 CERROR("Failed to initialize NI pools: %d\n", rc);
                 goto failed;
index 162742b..56195b3 100644 (file)
@@ -99,21 +99,10 @@ typedef struct
        int              *kib_timeout;          /* comms timeout (seconds) */
        int              *kib_keepalive;        /* keepalive timeout (seconds) */
        int              *kib_ntx;              /* # tx descs */
-       int              *kib_credits;          /* # concurrent sends */
-       int              *kib_peertxcredits;    /* # concurrent sends to 1 peer */
-       int              *kib_peerrtrcredits;   /* # per-peer router buffer credits */
-       int              *kib_peercredits_hiw;  /* # when eagerly to return credits */
-       int              *kib_peertimeout;      /* seconds to consider peer dead */
        char            **kib_default_ipif;     /* default IPoIB interface */
        int              *kib_retry_count;
        int              *kib_rnr_retry_count;
-       int              *kib_concurrent_sends; /* send work queue sizing */
        int              *kib_ib_mtu;           /* IB MTU */
-       int              *kib_map_on_demand;    /* map-on-demand if RD has more fragments
-                                                * than this value, 0 disable map-on-demand */
-       int              *kib_fmr_pool_size;    /* # FMRs in pool */
-       int              *kib_fmr_flush_trigger; /* When to trigger FMR flush */
-       int              *kib_fmr_cache;        /* enable FMR pool cache? */
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
        struct ctl_table_header *kib_sysctl;  /* sysctl interface */
 #endif
@@ -131,12 +120,10 @@ extern kib_tunables_t  kiblnd_tunables;
 #define IBLND_CREDITS_DEFAULT        8          /* default # of peer credits */
 #define IBLND_CREDITS_MAX          ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1)  /* Max # of peer credits */
 
-#define IBLND_MSG_QUEUE_SIZE(v)    ((v) == IBLND_MSG_VERSION_1 ? \
-                                     IBLND_MSG_QUEUE_SIZE_V1 :   \
-                                     *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
-#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
-                                     IBLND_CREDIT_HIGHWATER_V1 : \
-                                     *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
+/* when eagerly to return credits */
+#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
+                                       IBLND_CREDIT_HIGHWATER_V1 : \
+                                       t->lnd_peercredits_hiw)
 
 #ifdef HAVE_RDMA_CREATE_ID_4ARG
 #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
@@ -144,32 +131,12 @@ extern kib_tunables_t  kiblnd_tunables;
 #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
 #endif
 
-static inline int
-kiblnd_concurrent_sends_v1(void)
-{
-        if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
-                return IBLND_MSG_QUEUE_SIZE_V1 * 2;
-
-        if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
-                return IBLND_MSG_QUEUE_SIZE_V1 / 2;
-
-        return *kiblnd_tunables.kib_concurrent_sends;
-}
-
-#define IBLND_CONCURRENT_SENDS(v)  ((v) == IBLND_MSG_VERSION_1 ? \
-                                     kiblnd_concurrent_sends_v1() : \
-                                     *kiblnd_tunables.kib_concurrent_sends)
 /* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
 #define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
 #define IBLND_OOB_MSGS(v)           (IBLND_OOB_CAPABLE(v) ? 2 : 0)
 
 #define IBLND_MSG_SIZE              (4<<10)                 /* max size of queued messages (inc hdr) */
 #define IBLND_MAX_RDMA_FRAGS         LNET_MAX_IOV           /* max # of fragments supported */
-#define IBLND_CFG_RDMA_FRAGS       (*kiblnd_tunables.kib_map_on_demand != 0 ? \
-                                    *kiblnd_tunables.kib_map_on_demand :      \
-                                     IBLND_MAX_RDMA_FRAGS)  /* max # of fragments configured by user */
-#define IBLND_RDMA_FRAGS(v)        ((v) == IBLND_MSG_VERSION_1 ? \
-                                     IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)
 
 /************************/
 /* derived constants... */
@@ -189,7 +156,8 @@ kiblnd_concurrent_sends_v1(void)
 /* WRs and CQEs (per connection) */
 #define IBLND_RECV_WRS(c)            IBLND_RX_MSGS(c)
 #define IBLND_SEND_WRS(c)      \
-       ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
+       ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
+                                                         c->ibc_peer->ibp_ni))
 #define IBLND_CQ_ENTRIES(c)         (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
 
 struct kib_hca_dev;
@@ -331,6 +299,7 @@ typedef struct
        int                     fps_cpt;                /* CPT id */
        int                     fps_pool_size;
        int                     fps_flush_trigger;
+       int                     fps_cache;
        /* is allocating new pool */
        int                     fps_increasing;
        /* time stamp for retry if failed to allocate */
@@ -792,6 +761,48 @@ extern kib_data_t      kiblnd_data;
 
 extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
 
+int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
+
+/* max # of fragments configured by user */
+static inline int
+kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
+{
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+       int mod;
+
+       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       mod = tunables->lnd_map_on_demand;
+       return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
+}
+
+static inline int
+kiblnd_rdma_frags(int version, struct lnet_ni *ni)
+{
+       return version == IBLND_MSG_VERSION_1 ?
+         IBLND_MAX_RDMA_FRAGS :
+         kiblnd_cfg_rdma_frags(ni);
+}
+
+static inline int
+kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
+{
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+       int concurrent_sends;
+
+       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       concurrent_sends = tunables->lnd_concurrent_sends;
+
+       if (version == IBLND_MSG_VERSION_1) {
+               if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
+                       return IBLND_MSG_QUEUE_SIZE_V1 * 2;
+
+               if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
+                       return IBLND_MSG_QUEUE_SIZE_V1 / 2;
+       }
+
+       return concurrent_sends;
+}
+
 static inline void
 kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
 {
@@ -914,10 +925,14 @@ kiblnd_send_keepalive(kib_conn_t *conn)
 static inline int
 kiblnd_need_noop(kib_conn_t *conn)
 {
-        LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+       lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+
+       LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
 
         if (conn->ibc_outstanding_credits <
-            IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
+           IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
             !kiblnd_send_keepalive(conn))
                 return 0; /* No need to send NOOP */
 
@@ -1125,8 +1140,7 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
 #define KIBLND_CONN_PARAM(e)            ((e)->param.conn.private_data)
 #define KIBLND_CONN_PARAM_LEN(e)        ((e)->param.conn.private_data_len)
 
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
-                                   kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
                                    int negotiated_nfrags);
 void kiblnd_map_rx_descs(kib_conn_t *conn);
 void kiblnd_unmap_rx_descs(kib_conn_t *conn);
@@ -1137,6 +1151,7 @@ int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
                         __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr);
 void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
 
+int  kiblnd_tunables_setup(struct lnet_ni *ni);
 int  kiblnd_tunables_init(void);
 void kiblnd_tunables_fini(void);
 
index 5ba9d24..6160ea5 100644 (file)
@@ -626,8 +626,8 @@ kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
 static int
 kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
 {
-       kib_hca_dev_t *hdev  = tx->tx_pool->tpo_hdev;
        kib_net_t     *net   = ni->ni_data;
+       kib_hca_dev_t *hdev  = net->ibn_dev->ibd_hdev;
        struct ib_mr  *mr    = NULL;
        __u32 nob;
        int i;
@@ -648,7 +648,7 @@ kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
                 nob += rd->rd_frags[i].rf_nob;
         }
 
-       mr = kiblnd_find_rd_dma_mr(hdev, rd,
+       mr = kiblnd_find_rd_dma_mr(ni, rd,
                                   (tx->tx_conn != NULL) ?
                                   tx->tx_conn->ibc_max_frags : -1);
        if (mr != NULL) {
@@ -763,6 +763,7 @@ __must_hold(&conn->ibc_lock)
 {
         kib_msg_t         *msg = tx->tx_msg;
         kib_peer_t        *peer = conn->ibc_peer;
+       struct lnet_ni    *ni = peer->ibp_ni;
         int                ver = conn->ibc_version;
         int                rc;
         int                done;
@@ -778,7 +779,8 @@ __must_hold(&conn->ibc_lock)
        LASSERT(conn->ibc_credits >= 0);
        LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
 
-        if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
+       if (conn->ibc_nsends_posted ==
+           kiblnd_concurrent_sends(ver, ni)) {
                 /* tx completions outstanding... */
                 CDEBUG(D_NET, "%s: posted enough\n",
                        libcfs_nid2str(peer->ibp_nid));
@@ -923,7 +925,8 @@ kiblnd_check_sends (kib_conn_t *conn)
 
        spin_lock(&conn->ibc_lock);
 
-        LASSERT (conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver));
+       LASSERT(conn->ibc_nsends_posted <=
+               kiblnd_concurrent_sends(ver, ni));
         LASSERT (!IBLND_OOB_CAPABLE(ver) ||
                  conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
         LASSERT (conn->ibc_reserved_credits >= 0);
@@ -2327,12 +2330,12 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
         }
 
        if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
-           IBLND_MSG_QUEUE_SIZE(version)) {
+           kiblnd_msg_queue_size(version, ni)) {
                CERROR("Can't accept conn from %s, queue depth too large: "
                       " %d (<=%d wanted)\n",
                       libcfs_nid2str(nid),
                       reqmsg->ibm_u.connparams.ibcp_queue_depth,
-                      IBLND_MSG_QUEUE_SIZE(version));
+                      kiblnd_msg_queue_size(version, ni));
 
                if (version == IBLND_MSG_VERSION)
                        rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
@@ -2341,27 +2344,28 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
        }
 
        if (reqmsg->ibm_u.connparams.ibcp_max_frags >
-           IBLND_RDMA_FRAGS(version)) {
+           kiblnd_rdma_frags(version, ni)) {
                CWARN("Can't accept conn from %s (version %x): "
                      "max_frags %d too large (%d wanted)\n",
-                      libcfs_nid2str(nid), version,
-                      reqmsg->ibm_u.connparams.ibcp_max_frags,
-                      IBLND_RDMA_FRAGS(version));
+                     libcfs_nid2str(nid), version,
+                     reqmsg->ibm_u.connparams.ibcp_max_frags,
+                     kiblnd_rdma_frags(version, ni));
 
                if (version >= IBLND_MSG_VERSION)
                        rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
 
                goto failed;
        } else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
-                  IBLND_RDMA_FRAGS(version) && net->ibn_fmr_ps == NULL) {
+                  kiblnd_rdma_frags(version, ni) &&
+                  net->ibn_fmr_ps == NULL) {
                CWARN("Can't accept conn from %s (version %x): "
                      "max_frags %d incompatible without FMR pool "
                      "(%d wanted)\n",
                      libcfs_nid2str(nid), version,
                      reqmsg->ibm_u.connparams.ibcp_max_frags,
-                     IBLND_RDMA_FRAGS(version));
+                     kiblnd_rdma_frags(version, ni));
 
-               if (version >= IBLND_MSG_VERSION)
+               if (version == IBLND_MSG_VERSION)
                        rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
 
                goto failed;
@@ -2519,12 +2523,12 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
         if (ni != NULL)
                 lnet_ni_decref(ni);
 
-        rej.ibr_version = version;
-        rej.ibr_cp.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
-        rej.ibr_cp.ibcp_max_frags   = IBLND_RDMA_FRAGS(version);
-        kiblnd_reject(cmid, &rej);
+       rej.ibr_version = version;
+       rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
+       rej.ibr_cp.ibcp_max_frags   = kiblnd_rdma_frags(version, ni);
+       kiblnd_reject(cmid, &rej);
 
-        return -ECONNREFUSED;
+       return -ECONNREFUSED;
 }
 
 static void
@@ -2570,12 +2574,15 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version,
                 reason = "Unknown";
                 break;
 
-       case IBLND_REJECT_RDMA_FRAGS:
+       case IBLND_REJECT_RDMA_FRAGS: {
+               struct lnet_ioctl_config_lnd_tunables *tunables;
+
                if (!cp) {
                        reason = "can't negotiate max frags";
                        goto out;
                }
-               if (*kiblnd_tunables.kib_map_on_demand == 0) {
+               tunables = peer->ibp_ni->ni_lnd_tunables;
+               if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
                        reason = "map_on_demand must be enabled";
                        goto out;
                }
@@ -2587,7 +2594,7 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version,
                peer->ibp_max_frags = frag_num;
                reason = "rdma fragments";
                break;
-
+       }
        case IBLND_REJECT_MSG_QUEUE_SIZE:
                if (!cp) {
                        reason = "can't negotiate queue depth";
index cdddcd7..61254c0 100644 (file)
@@ -135,7 +135,6 @@ static int dev_failover = 0;
 CFS_MODULE_PARM(dev_failover, "i", int, 0444,
                "HCA failover for bonding (0 off, 1 on, other values reserved)");
 
-
 static int require_privileged_port = 0;
 CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
                 "require privileged port when accepting connection");
@@ -151,25 +150,17 @@ kib_tunables_t kiblnd_tunables = {
         .kib_timeout                = &timeout,
         .kib_keepalive              = &keepalive,
         .kib_ntx                    = &ntx,
-        .kib_credits                = &credits,
-        .kib_peertxcredits          = &peer_credits,
-        .kib_peercredits_hiw        = &peer_credits_hiw,
-        .kib_peerrtrcredits         = &peer_buffer_credits,
-        .kib_peertimeout            = &peer_timeout,
         .kib_default_ipif           = &ipif_name,
         .kib_retry_count            = &retry_count,
         .kib_rnr_retry_count        = &rnr_retry_count,
-        .kib_concurrent_sends       = &concurrent_sends,
         .kib_ib_mtu                 = &ib_mtu,
-        .kib_map_on_demand          = &map_on_demand,
-        .kib_fmr_pool_size          = &fmr_pool_size,
-        .kib_fmr_flush_trigger      = &fmr_flush_trigger,
-        .kib_fmr_cache              = &fmr_cache,
         .kib_require_priv_port      = &require_privileged_port,
        .kib_use_priv_port          = &use_privileged_port,
        .kib_nscheds                = &nscheds
 };
 
+static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
+
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
 
 static char ipif_basename_space[32];
@@ -388,66 +379,139 @@ static void
 kiblnd_sysctl_fini (void)
 {
 }
-
 #endif
 
+/* # messages/RDMAs in-flight */
 int
-kiblnd_tunables_init (void)
+kiblnd_msg_queue_size(int version, lnet_ni_t *ni)
 {
-        if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
-                CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
-                       *kiblnd_tunables.kib_ib_mtu);
-                return -EINVAL;
-        }
-
-        if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
-                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
-
-        if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
-                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
-
-        if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
-                *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
-
-        if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
-                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
-
-        if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
-                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
-
-        if (*kiblnd_tunables.kib_map_on_demand < 0 ||
-            *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
-                *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
-
-        if (*kiblnd_tunables.kib_map_on_demand == 1)
-                *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
-
-        if (*kiblnd_tunables.kib_concurrent_sends == 0) {
-                if (*kiblnd_tunables.kib_map_on_demand > 0 &&
-                    *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
-                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
-                else
-                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
-        }
-
-        if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
-                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
-
-        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
-                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
+       if (version == IBLND_MSG_VERSION_1)
+               return IBLND_MSG_QUEUE_SIZE_V1;
+       else if (ni)
+               return ni->ni_peertxcredits;
+       else
+               return peer_credits;
+}
 
-        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
-                CWARN("Concurrent sends %d is lower than message queue size: %d, "
-                      "performance may drop slightly.\n",
-                      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
-        }
+int
+kiblnd_tunables_setup(lnet_ni_t *ni)
+{
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+
+       /*
+        * if there was no tunables specified, setup the tunables to be
+        * defaulted
+        */
+       if (!ni->ni_lnd_tunables) {
+               LIBCFS_ALLOC(ni->ni_lnd_tunables,
+                            sizeof(*ni->ni_lnd_tunables));
+               if (!ni->ni_lnd_tunables)
+                       return -ENOMEM;
+
+               memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+                      &default_tunables, sizeof(*tunables));
+       }
+       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+       /* Current API version */
+       tunables->lnd_version = 0;
+
+       if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
+               CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
+                      *kiblnd_tunables.kib_ib_mtu);
+               return -EINVAL;
+       }
+
+       if (!ni->ni_peertimeout)
+               ni->ni_peertimeout = peer_timeout;
+
+       if (!ni->ni_maxtxcredits)
+               ni->ni_maxtxcredits = credits;
+
+       if (!ni->ni_peertxcredits)
+               ni->ni_peertxcredits = peer_credits;
+
+       if (!ni->ni_peerrtrcredits)
+               ni->ni_peerrtrcredits = peer_buffer_credits;
+
+       if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
+               ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
+
+       if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
+               ni->ni_peertxcredits = IBLND_CREDITS_MAX;
+
+       if (ni->ni_peertxcredits > credits)
+               ni->ni_peertxcredits = credits;
+
+       if (!tunables->lnd_peercredits_hiw)
+               tunables->lnd_peercredits_hiw = peer_credits_hiw;
+
+       if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
+               tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
+
+       if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
+               tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
+
+       if (tunables->lnd_map_on_demand < 0 ||
+           tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
+               /* disable map-on-demand */
+               tunables->lnd_map_on_demand = 0;
+       }
+
+       if (tunables->lnd_map_on_demand == 1) {
+               /* don't make sense to create map if only one fragment */
+               tunables->lnd_map_on_demand = 2;
+       }
+
+       if (tunables->lnd_concurrent_sends == 0) {
+               if (tunables->lnd_map_on_demand > 0 &&
+                   tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
+                       tunables->lnd_concurrent_sends =
+                                               ni->ni_peertxcredits * 2;
+               } else {
+                       tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+               }
+       }
+
+       if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
+               tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
+
+       if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
+               tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
+
+       if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
+               CWARN("Concurrent sends %d is lower than message "
+                     "queue size: %d, performance may drop slightly.\n",
+                     tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
+       }
+
+       if (!tunables->lnd_fmr_pool_size)
+               tunables->lnd_fmr_pool_size = fmr_pool_size;
+       if (!tunables->lnd_fmr_flush_trigger)
+               tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
+       if (!tunables->lnd_fmr_cache)
+               tunables->lnd_fmr_cache = fmr_cache;
+
+       return 0;
+}
 
-        kiblnd_sysctl_init();
-        return 0;
+int
+kiblnd_tunables_init(void)
+{
+       default_tunables.lnd_version = 0;
+       default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
+       default_tunables.lnd_map_on_demand = map_on_demand;
+       default_tunables.lnd_concurrent_sends = concurrent_sends;
+       default_tunables.lnd_fmr_pool_size = fmr_pool_size;
+       default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
+       default_tunables.lnd_fmr_cache = fmr_cache;
+
+       kiblnd_sysctl_init();
+       return 0;
 }
 
 void
-kiblnd_tunables_fini (void)
+kiblnd_tunables_fini(void)
 {
-        kiblnd_sysctl_fini();
+       kiblnd_sysctl_fini();
 }
index c961893..74a0055 100644 (file)
@@ -1223,9 +1223,9 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
 }
 
 static int
-lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
-                  __s32 peer_cr, __s32 peer_buf_cr, __s32 credits)
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
 {
+       struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
        int                     rc = -EINVAL;
        __u32                   lnd_type;
        lnd_t                   *lnd;
@@ -1290,6 +1290,21 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
 
        ni->ni_lnd = lnd;
 
+       if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
+               lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+
+       if (lnd_tunables != NULL) {
+               LIBCFS_ALLOC(ni->ni_lnd_tunables,
+                            sizeof(*ni->ni_lnd_tunables));
+               if (ni->ni_lnd_tunables == NULL) {
+                       LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+                       rc = -ENOMEM;
+                       goto failed0;
+               }
+               memcpy(ni->ni_lnd_tunables, lnd_tunables,
+                      sizeof(*ni->ni_lnd_tunables));
+       }
+
        rc = (lnd->lnd_startup)(ni);
 
        LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
@@ -1305,20 +1320,29 @@ lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
 
        /* If given some LND tunable parameters, parse those now to
         * override the values in the NI structure. */
-       if (peer_buf_cr >= 0)
-               ni->ni_peerrtrcredits = peer_buf_cr;
-       if (peer_timeout >= 0)
-               ni->ni_peertimeout = peer_timeout;
+       if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) {
+               ni->ni_peerrtrcredits =
+                       conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+       }
+       if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) {
+               ni->ni_peertimeout =
+                       conf->cfg_config_u.cfg_net.net_peer_timeout;
+       }
+
        /*
         * TODO
         * Note: For now, don't allow the user to change
         * peertxcredits as this number is used in the
         * IB LND to control queue depth.
-        * if (peer_cr != -1)
-        *      ni->ni_peertxcredits = peer_cr;
+        *
+        * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
+        *      ni->ni_peertxcredits =
+        *              conf->cfg_config_u.cfg_net.net_peer_tx_credits;
         */
-       if (credits >= 0)
-               ni->ni_maxtxcredits = credits;
+       if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) {
+               ni->ni_maxtxcredits =
+                       conf->cfg_config_u.cfg_net.net_max_tx_credits;
+       }
 
        LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
 
@@ -1379,7 +1403,7 @@ lnet_startup_lndnis(struct list_head *nilist)
        while (!list_empty(nilist)) {
                ni = list_entry(nilist->next, lnet_ni_t, ni_list);
                list_del(&ni->ni_list);
-               rc = lnet_startup_lndni(ni, -1, -1, -1, -1);
+               rc = lnet_startup_lndni(ni, NULL);
 
                if (rc < 0)
                        goto failed;
@@ -1657,17 +1681,18 @@ EXPORT_SYMBOL(LNetNIFini);
  * \param[out] net_config      Network configuration
  */
 static void
-lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
-                 int *peer_timeout, int *peer_tx_credits,
-                 int *peer_rtr_credits, int *max_tx_credits,
-                 struct lnet_ioctl_net_config *net_config)
+lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
 {
+       struct lnet_ioctl_net_config *net_config;
+       struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
+       size_t min_size, tunable_size = 0;
        int i;
 
-       if (ni == NULL)
+       if (!ni || !config)
                return;
 
-       if (net_config == NULL)
+       net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
+       if (!net_config)
                return;
 
        CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
@@ -1683,11 +1708,11 @@ lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
                }
        }
 
-       *nid = ni->ni_nid;
-       *peer_timeout = ni->ni_peertimeout;
-       *peer_tx_credits = ni->ni_peertxcredits;
-       *peer_rtr_credits = ni->ni_peerrtrcredits;
-       *max_tx_credits = ni->ni_maxtxcredits;
+       config->cfg_nid = ni->ni_nid;
+       config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
+       config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
+       config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
+       config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
 
        net_config->ni_status = ni->ni_status->ns_status;
 
@@ -1697,19 +1722,44 @@ lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
             i++)
                net_config->ni_cpts[i] = ni->ni_cpts[i];
 
-       *cpt_count = ni->ni_ncpts;
+       config->cfg_ncpts = ni->ni_ncpts;
+
+       /*
+        * See if user land tools sent in a newer and larger version
+        * of struct lnet_tunables than what the kernel uses.
+        */
+       min_size = sizeof(*config) + sizeof(*net_config);
+
+       if (config->cfg_hdr.ioc_len > min_size)
+               tunable_size = config->cfg_hdr.ioc_len - min_size;
+
+       /* Don't copy to much data to user space */
+       min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+       lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
+
+       if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
+               memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+               config->cfg_config_u.cfg_net.net_interface_count = 1;
+
+               /* Tell user land that kernel side has less data */
+               if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+                       min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
+                       config->cfg_hdr.ioc_len -= min_size;
+               }
+       }
 }
 
-int
-lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
-                   int *peer_tx_credits, int *peer_rtr_credits,
-                   int *max_tx_credits,
-                   struct lnet_ioctl_net_config *net_config)
+static int
+lnet_get_net_config(struct lnet_ioctl_config_data *config)
 {
-       struct lnet_ni          *ni;
-       struct list_head        *tmp;
-       int                     cpt;
-       int                     rc = -ENOENT;
+       struct lnet_ni *ni;
+       struct list_head *tmp;
+       int idx = config->cfg_count;
+       int rc = -ENOENT;
+       int cpt;
+
+       if (unlikely(!config->cfg_bulk))
+               return -EINVAL;
 
        cpt = lnet_net_lock_current();
 
@@ -1718,9 +1768,7 @@ lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
                if (idx-- == 0) {
                        rc = 0;
                        lnet_ni_lock(ni);
-                       lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
-                                         peer_tx_credits, peer_rtr_credits,
-                                         max_tx_credits, net_config);
+                       lnet_fill_ni_info(ni, config);
                        lnet_ni_unlock(ni);
                        break;
                }
@@ -1731,10 +1779,9 @@ lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
 }
 
 int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
-               __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
-               __s32 credits)
+lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
 {
+       char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
        lnet_ping_info_t        *pinfo;
        lnet_handle_md_t        md_handle;
        struct lnet_ni          *ni;
@@ -1777,8 +1824,7 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
 
        list_del_init(&ni->ni_list);
 
-       rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
-                               peer_buf_cr, credits);
+       rc = lnet_startup_lndni(ni, conf);
        if (rc != 0)
                goto failed1;
 
@@ -1926,31 +1972,14 @@ LNetCtl(unsigned int cmd, void *arg)
                                        rtr_priority);
 
        case IOC_LIBCFS_GET_NET: {
-               struct lnet_ioctl_net_config *net_config;
-               size_t total = sizeof(*config) + sizeof(*net_config);
-
+               size_t total = sizeof(*config) +
+                              sizeof(struct lnet_ioctl_net_config);
                config = arg;
 
                if (config->cfg_hdr.ioc_len < total)
                        return -EINVAL;
 
-               net_config = (struct lnet_ioctl_net_config *)
-                       config->cfg_bulk;
-               if (net_config == NULL)
-                       return -EINVAL;
-
-               return lnet_get_net_config(config->cfg_count,
-                                          &config->cfg_ncpts,
-                                          &config->cfg_nid,
-                                          &config->cfg_config_u.
-                                               cfg_net.net_peer_timeout,
-                                          &config->cfg_config_u.cfg_net.
-                                               net_peer_tx_credits,
-                                          &config->cfg_config_u.cfg_net.
-                                               net_peer_rtr_credits,
-                                          &config->cfg_config_u.cfg_net.
-                                               net_max_tx_credits,
-                                          net_config);
+               return lnet_get_net_config(config);
        }
 
        case IOC_LIBCFS_GET_LNET_STATS:
index b00267d..1105231 100644 (file)
@@ -108,6 +108,9 @@ lnet_ni_free(struct lnet_ni *ni)
        if (ni->ni_cpts != NULL)
                cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
 
+       if (ni->ni_lnd_tunables != NULL)
+               LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
+
        for (i = 0; i < LNET_MAX_INTERFACES &&
                    ni->ni_interfaces[i] != NULL; i++) {
                LIBCFS_FREE(ni->ni_interfaces[i],
index 7cabfde..bc36586 100644 (file)
@@ -103,16 +103,7 @@ lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr)
 
        LNET_MUTEX_LOCK(&lnet_config_mutex);
        if (the_lnet.ln_niinit_self)
-               rc = lnet_dyn_add_ni(LNET_PID_LUSTRE,
-                                    conf->cfg_config_u.cfg_net.net_intf,
-                                    conf->cfg_config_u.cfg_net.
-                                       net_peer_timeout,
-                                    conf->cfg_config_u.cfg_net.
-                                       net_peer_tx_credits,
-                                    conf->cfg_config_u.cfg_net.
-                                       net_peer_rtr_credits,
-                                    conf->cfg_config_u.cfg_net.
-                                       net_max_tx_credits);
+               rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
        else
                rc = -EINVAL;
        LNET_MUTEX_UNLOCK(&lnet_config_mutex);
index 9b6db63..c88135e 100644 (file)
@@ -30,9 +30,10 @@ lib_LTLIBRARIES = liblnetconfig.la
 
 CYAML := $(top_builddir)/lnet/utils/cyaml/cyaml.c \
         $(top_builddir)/lnet/utils/cyaml/cyaml.h
-liblnetconfig_la_SOURCES  = liblnetconfig.c liblnetconfig.h $(CYAML)
+liblnetconfig_la_SOURCES  = liblnetconfig.c liblnetconfig.h \
+                           liblnetconfig_lnd.c liblnd.h $(CYAML)
 liblnetconfig_la_CPPFLAGS = -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 \
                            -DLUSTRE_UTILS=1 -I$(top_builddir)/lnet/utils/cyaml
-liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -version-info 1:0:0
+liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -version-info 1:1:0
 
 EXTRA_DIST =
diff --git a/lnet/utils/lnetconfig/liblnd.h b/lnet/utils/lnetconfig/liblnd.h
new file mode 100644 (file)
index 0000000..79e9fdb
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * LGPL HEADER END
+ *
+ * Copyright (c) 2015, James Simmons <jsimmons@infradead.org>
+ */
+
+#ifndef LIB_LND_CONFIG_API_H
+#define LIB_LND_CONFIG_API_H
+
+#include <lnet/lib-dlc.h>
+#include "cyaml.h"
+
+int
+lustre_interface_show_net(struct cYAML *interfaces, unsigned int index,
+                         bool detail, struct lnet_ioctl_config_data *data,
+                         struct lnet_ioctl_net_config *net_config);
+
+void
+lustre_interface_parse(struct cYAML *lndparams, const char *dev_name,
+                      struct lnet_ioctl_config_lnd_tunables *lnd_cfg);
+
+#endif /* LIB_LND_CONFIG_API_H */
index da7fe62..e2ef2b6 100644 (file)
@@ -43,7 +43,7 @@
 #include <libcfs/util/ioctl.h>
 #include <lnet/lnetctl.h>
 #include <lnet/socklnd.h>
-#include <lnet/lnet.h>
+#include "liblnd.h"
 #include "liblnetconfig.h"
 #include "cyaml.h"
 
@@ -443,15 +443,22 @@ out:
 int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
                           int peer_to, int peer_cr, int peer_buf_cr,
                           int credits, char *smp, int seq_no,
+                          struct lnet_ioctl_config_lnd_tunables *lnd_tunables,
                           struct cYAML **err_rc)
 {
-       struct lnet_ioctl_config_data data;
+       struct lnet_ioctl_config_lnd_tunables *lnd = NULL;
+       struct lnet_ioctl_config_data *data;
+       size_t ioctl_size = sizeof(*data);
        char buf[LNET_MAX_STR_LEN];
        int rc = LUSTRE_CFG_RC_NO_ERR;
        char err_str[LNET_MAX_STR_LEN];
 
        snprintf(err_str, sizeof(err_str), "\"success\"");
 
+       /* No need to register lo */
+       if (net != NULL && !strcmp(net, "lo"))
+               return 0;
+
        if (ip2net == NULL && (intf == NULL || net == NULL)) {
                snprintf(err_str,
                         sizeof(err_str),
@@ -481,26 +488,41 @@ int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
                goto out;
        }
 
+       if (lnd_tunables != NULL)
+               ioctl_size += sizeof(*lnd_tunables);
+
+       data = calloc(1, ioctl_size);
+       if (data == NULL)
+               goto out;
+
        if (ip2net == NULL)
                snprintf(buf, sizeof(buf) - 1, "%s(%s)%s",
                        net, intf,
                        (smp) ? smp : "");
 
-       LIBCFS_IOC_INIT_V2(data, cfg_hdr);
-       strncpy(data.cfg_config_u.cfg_net.net_intf,
+       LIBCFS_IOC_INIT_V2(*data, cfg_hdr);
+       strncpy(data->cfg_config_u.cfg_net.net_intf,
                (ip2net != NULL) ? ip2net : buf, sizeof(buf));
-       data.cfg_config_u.cfg_net.net_peer_timeout = peer_to;
-       data.cfg_config_u.cfg_net.net_peer_tx_credits = peer_cr;
-       data.cfg_config_u.cfg_net.net_peer_rtr_credits = peer_buf_cr;
-       data.cfg_config_u.cfg_net.net_max_tx_credits = credits;
+       data->cfg_config_u.cfg_net.net_peer_timeout = peer_to;
+       data->cfg_config_u.cfg_net.net_peer_tx_credits = peer_cr;
+       data->cfg_config_u.cfg_net.net_peer_rtr_credits = peer_buf_cr;
+       data->cfg_config_u.cfg_net.net_max_tx_credits = credits;
+       /* Add in tunable settings if available */
+       if (lnd_tunables != NULL) {
+               lnd = (struct lnet_ioctl_config_lnd_tunables *)data->cfg_bulk;
+
+               data->cfg_hdr.ioc_len = ioctl_size;
+               memcpy(lnd, lnd_tunables, sizeof(*lnd_tunables));
+       }
 
-       rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_NET, &data);
+       rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_NET, data);
        if (rc < 0) {
                rc = -errno;
                snprintf(err_str,
                         sizeof(err_str),
                         "\"cannot add network: %s\"", strerror(errno));
        }
+       free(data);
 
 out:
        cYAML_build_error(rc, seq_no, ADD_CMD, "net", err_str, err_rc);
@@ -556,23 +578,25 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
                         struct cYAML **show_rc, struct cYAML **err_rc)
 {
        char *buf;
+       struct lnet_ioctl_config_lnd_tunables *lnd_cfg;
        struct lnet_ioctl_config_data *data;
        struct lnet_ioctl_net_config *net_config;
        __u32 net = LNET_NIDNET(LNET_NID_ANY);
        int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i, j;
        int l_errno = 0;
-       struct cYAML *root = NULL, *tunables = NULL,
-               *net_node = NULL, *interfaces = NULL,
-               *item = NULL, *first_seq = NULL;
+       struct cYAML *root = NULL, *tunables = NULL, *net_node = NULL,
+               *interfaces = NULL, *item = NULL, *first_seq = NULL;
        int str_buf_len = LNET_MAX_SHOW_NUM_CPT * 2;
        char str_buf[str_buf_len];
        char *pos;
        char err_str[LNET_MAX_STR_LEN];
        bool exist = false;
+       size_t buf_len;
 
        snprintf(err_str, sizeof(err_str), "\"out of memory\"");
 
-       buf = calloc(1, sizeof(*data) + sizeof(*net_config));
+       buf_len = sizeof(*data) + sizeof(*net_config) + sizeof(*lnd_cfg);
+       buf = calloc(1, buf_len);
        if (buf == NULL)
                goto out;
 
@@ -600,15 +624,14 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
        for (i = 0;; i++) {
                pos = str_buf;
 
-               memset(buf, 0, sizeof(*data) + sizeof(*net_config));
+               memset(buf, 0, buf_len);
 
                LIBCFS_IOC_INIT_V2(*data, cfg_hdr);
                /*
                 * set the ioc_len to the proper value since INIT assumes
                 * size of data
                 */
-               data->cfg_hdr.ioc_len = sizeof(struct lnet_ioctl_config_data) +
-                 sizeof(struct lnet_ioctl_net_config);
+               data->cfg_hdr.ioc_len = buf_len;
                data->cfg_count = i;
 
                rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NET, data);
@@ -636,8 +659,7 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
                if (first_seq == NULL)
                        first_seq = item;
 
-               if (cYAML_create_string(item,
-                                       "net",
+               if (cYAML_create_string(item, "net",
                                        libcfs_net2str(
                                                LNET_NIDNET(data->cfg_nid)))
                    == NULL)
@@ -647,8 +669,7 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
                                        libcfs_nid2str(data->cfg_nid)) == NULL)
                        goto out;
 
-               if (cYAML_create_string(item,
-                                       "status",
+               if (cYAML_create_string(item, "status",
                                        (net_config->ni_status ==
                                          LNET_NI_STATUS_UP) ?
                                            "up" : "down") == NULL)
@@ -662,15 +683,10 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
                                goto out;
 
                        for (j = 0; j < LNET_MAX_INTERFACES; j++) {
-                               if (strlen(net_config->ni_interfaces[j]) > 0) {
-                                       snprintf(str_buf,
-                                                sizeof(str_buf), "%d", j);
-                                       if (cYAML_create_string(interfaces,
-                                               str_buf,
-                                               net_config->ni_interfaces[j]) ==
-                                           NULL)
-                                               goto out;
-                               }
+                               if (lustre_interface_show_net(interfaces, j,
+                                                             detail, data,
+                                                             net_config) < 0)
+                                       goto out;
                        }
                }
 
@@ -683,23 +699,23 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
 
                        if (cYAML_create_number(tunables, "peer_timeout",
                                                data->cfg_config_u.cfg_net.
-                                                net_peer_timeout) == NULL)
+                                               net_peer_timeout) == NULL)
                                goto out;
 
                        if (cYAML_create_number(tunables, "peer_credits",
                                                data->cfg_config_u.cfg_net.
-                                                 net_peer_tx_credits) == NULL)
+                                               net_peer_tx_credits) == NULL)
                                goto out;
 
                        if (cYAML_create_number(tunables,
                                                "peer_buffer_credits",
                                                data->cfg_config_u.cfg_net.
-                                                 net_peer_rtr_credits) == NULL)
+                                               net_peer_rtr_credits) == NULL)
                                goto out;
 
                        if (cYAML_create_number(tunables, "credits",
                                                data->cfg_config_u.cfg_net.
-                                                 net_max_tx_credits) == NULL)
+                                               net_max_tx_credits) == NULL)
                                goto out;
 
                        /* out put the CPTs in the format: "[x,x,x,...]" */
@@ -1263,6 +1279,8 @@ static int handle_yaml_config_net(struct cYAML *tree, struct cYAML **show_rc,
        struct cYAML *net, *intf, *tunables, *seq_no,
              *peer_to = NULL, *peer_buf_cr = NULL, *peer_cr = NULL,
              *credits = NULL, *ip2net = NULL, *smp = NULL, *child;
+       struct lnet_ioctl_config_lnd_tunables *lnd_tunables_p = NULL;
+       struct lnet_ioctl_config_lnd_tunables lnd_tunables;
        char devs[LNET_MAX_STR_LEN];
        char *loc = devs;
        int size = LNET_MAX_STR_LEN;
@@ -1276,6 +1294,11 @@ static int handle_yaml_config_net(struct cYAML *tree, struct cYAML **show_rc,
                /* grab all the interfaces */
                child = intf->cy_child;
                while (child != NULL && size > 0) {
+                       struct cYAML *lnd_params;
+
+                       if (child->cy_valuestring == NULL)
+                               goto ignore_child;
+
                        if (loc > devs)
                                num  = snprintf(loc, size, ",%s",
                                                child->cy_valuestring);
@@ -1285,6 +1308,17 @@ static int handle_yaml_config_net(struct cYAML *tree, struct cYAML **show_rc,
                        size -= num;
                        loc += num;
                        intf_found = true;
+
+                       lnd_params = cYAML_get_object_item(intf,
+                                                          "lnd tunables");
+                       if (lnd_params != NULL) {
+                               const char *dev_name = child->cy_valuestring;
+                               lnd_tunables_p = &lnd_tunables;
+
+                               lustre_interface_parse(lnd_params, dev_name,
+                                                      lnd_tunables_p);
+                       }
+ignore_child:
                        child = child->cy_next;
                }
        }
@@ -1310,6 +1344,7 @@ static int handle_yaml_config_net(struct cYAML *tree, struct cYAML **show_rc,
                                      (credits) ? credits->cy_valueint : -1,
                                      (smp) ? smp->cy_valuestring : NULL,
                                      (seq_no) ? seq_no->cy_valueint : -1,
+                                     lnd_tunables_p,
                                      err_rc);
 }
 
index 0ec300d..2584cee 100644 (file)
@@ -27,6 +27,8 @@
 #ifndef LIB_LNET_CONFIG_API_H
 #define LIB_LNET_CONFIG_API_H
 
+#include <lnet/lnet.h>
+
 #define LUSTRE_CFG_RC_NO_ERR                    0
 #define LUSTRE_CFG_RC_BAD_PARAM                        -1
 #define LUSTRE_CFG_RC_MISSING_PARAM            -2
@@ -120,11 +122,13 @@ int lustre_lnet_show_route(char *nw, char *gw,
  *   credits - network interface credits
  *   smp - cpu affinity
  *   seq_no - sequence number of the request
+ *   lnd_tunables - lnet specific tunable parameters
  *   err_rc - [OUT] struct cYAML tree describing the error. Freed by caller
  */
 int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
                           int peer_to, int peer_cr, int peer_buf_cr,
                           int credits, char *smp, int seq_no,
+                          struct lnet_ioctl_config_lnd_tunables *lnd_tunables,
                           struct cYAML **err_rc);
 
 /*
diff --git a/lnet/utils/lnetconfig/liblnetconfig_lnd.c b/lnet/utils/lnetconfig/liblnetconfig_lnd.c
new file mode 100644 (file)
index 0000000..96290e1
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * LGPL HEADER END
+ *
+ * Copyright (c) 2015, James Simmons
+ *
+ * Author:
+ *   James Simmons <jsimmons@infradead.org>
+ */
+#include <stdio.h>
+#include <string.h>
+#include <libcfs/util/ioctl.h>
+#include "liblnetconfig.h"
+#include "cyaml.h"
+
+static int
+lustre_ko2iblnd_show_net(struct cYAML *lndparams,
+                        struct lnet_ioctl_config_lnd_tunables *tunables)
+{
+       struct lnet_ioctl_config_o2iblnd_tunables *lnd_cfg;
+
+       lnd_cfg = &tunables->lt_tun_u.lt_o2ib;
+
+       if (cYAML_create_number(lndparams, "peercredits_hiw",
+                               lnd_cfg->lnd_peercredits_hiw) == NULL)
+               return -1;
+
+       if (cYAML_create_number(lndparams, "map_on_demand",
+                               lnd_cfg->lnd_map_on_demand) == NULL)
+               return -1;
+
+       if (cYAML_create_number(lndparams, "concurrent_sends",
+                               lnd_cfg->lnd_concurrent_sends) == NULL)
+               return -1;
+
+       if (cYAML_create_number(lndparams, "fmr_pool_size",
+                               lnd_cfg->lnd_fmr_pool_size) == NULL)
+               return -1;
+
+       if (cYAML_create_number(lndparams, "fmr_flush_trigger",
+                               lnd_cfg->lnd_fmr_flush_trigger) == NULL)
+               return -1;
+
+       if (cYAML_create_number(lndparams, "fmr_cache",
+                               lnd_cfg->lnd_fmr_cache) == NULL)
+               return -1;
+       return 0;
+}
+
+int
+lustre_interface_show_net(struct cYAML *interfaces, unsigned int index,
+                         bool detail, struct lnet_ioctl_config_data *data,
+                         struct lnet_ioctl_net_config *net_config)
+{
+       char ni_index[2]; /* LNET_MAX_INTERFACES is only 16 */
+
+       if (strlen(net_config->ni_interfaces[index]) == 0)
+               return 0;
+
+       snprintf(ni_index, sizeof(ni_index), "%d", index);
+       if (cYAML_create_string(interfaces, ni_index,
+                               net_config->ni_interfaces[index]) == NULL)
+               return -1;
+
+       if (detail) {
+               __u32 net = LNET_NETTYP(LNET_NIDNET(data->cfg_nid));
+               struct lnet_ioctl_config_lnd_tunables *lnd_cfg;
+               struct cYAML *lndparams;
+
+               if (data->cfg_config_u.cfg_net.net_interface_count == 0 ||
+                   net != O2IBLND)
+                       return 0;
+
+               lndparams = cYAML_create_object(interfaces, "lnd tunables");
+               if (lndparams == NULL)
+                       return -1;
+
+               lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
+               if (lustre_ko2iblnd_show_net(lndparams, lnd_cfg) < 0)
+                       return -1;
+       }
+       return 0;
+}
+
+static void
+lustre_ko2iblnd_parse_net(struct cYAML *lndparams,
+                         struct lnet_ioctl_config_lnd_tunables *lnd_cfg)
+{
+       struct cYAML *map_on_demand = NULL, *concurrent_sends = NULL;
+       struct cYAML *fmr_pool_size = NULL, *fmr_cache = NULL;
+       struct cYAML *fmr_flush_trigger = NULL;
+
+       map_on_demand = cYAML_get_object_item(lndparams, "map_on_demand");
+       lnd_cfg->lt_tun_u.lt_o2ib.lnd_map_on_demand =
+               (map_on_demand) ? map_on_demand->cy_valueint : 0;
+
+       concurrent_sends = cYAML_get_object_item(lndparams, "concurrent_sends");
+       lnd_cfg->lt_tun_u.lt_o2ib.lnd_concurrent_sends =
+               (concurrent_sends) ? concurrent_sends->cy_valueint : 0;
+
+       fmr_pool_size = cYAML_get_object_item(lndparams, "fmr_pool_size");
+       lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_pool_size =
+               (fmr_pool_size) ? fmr_pool_size->cy_valueint : 0;
+
+       fmr_flush_trigger = cYAML_get_object_item(lndparams,
+                                                 "fmr_flush_trigger");
+       lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_flush_trigger =
+               (fmr_flush_trigger) ? fmr_flush_trigger->cy_valueint : 0;
+
+       fmr_cache = cYAML_get_object_item(lndparams, "fmr_cache");
+       lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_cache =
+               (fmr_cache) ? fmr_cache->cy_valueint : 0;
+}
+
+void
+lustre_interface_parse(struct cYAML *lndparams, const char *dev_name,
+                      struct lnet_ioctl_config_lnd_tunables *lnd_cfg)
+{
+       if (dev_name != NULL && strstr(dev_name, "ib"))
+               lustre_ko2iblnd_parse_net(lndparams, lnd_cfg);
+}
index 2a8db75..670aa02 100644 (file)
@@ -487,7 +487,7 @@ static int jt_add_net(int argc, char **argv)
        }
 
        rc = lustre_lnet_config_net(network, intf, ip2net, pto, pc, pbc,
-                                   cre, cpt, -1, &err_rc);
+                                   cre, cpt, -1, NULL, &err_rc);
 
        if (rc != LUSTRE_CFG_RC_NO_ERR)
                cYAML_print_tree2file(stderr, err_rc);