#define LNET_MAX_SHOW_NUM_CPT 128
#define LNET_UNDEFINED_HOPS ((__u32) -1)
+struct lnet_ioctl_config_lnd_cmn_tunables {
+ __u32 lct_version;
+ __u32 lct_peer_timeout;
+ __u32 lct_peer_tx_credits;
+ __u32 lct_peer_rtr_credits;
+ __u32 lct_max_tx_credits;
+};
+
+struct lnet_ioctl_config_o2iblnd_tunables {
+ __u32 lnd_version;
+ __u32 lnd_peercredits_hiw;
+ __u32 lnd_map_on_demand;
+ __u32 lnd_concurrent_sends;
+ __u32 lnd_fmr_pool_size;
+ __u32 lnd_fmr_flush_trigger;
+ __u32 lnd_fmr_cache;
+ __u32 pad;
+};
+
+struct lnet_ioctl_config_lnd_tunables {
+ struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
+ union {
+ struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
+ } lt_tun_u;
+};
+
struct lnet_ioctl_net_config {
char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN];
__u32 ni_status;
__u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT];
+ char cfg_bulk[0];
};
#define LNET_TINY_BUF_IDX 0
__s32 net_peer_rtr_credits;
__s32 net_max_tx_credits;
__u32 net_cksum_algo;
- __u32 net_pad;
+ __u32 net_interface_count;
} cfg_net;
struct {
__u32 buf_enable;
void lnet_destroy_routes(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
-int lnet_get_net_config(int idx,
- __u32 *cpt_count,
- __u64 *nid,
- int *peer_timeout,
- int *peer_tx_credits,
- int *peer_rtr_cr,
- int *max_tx_credits,
- struct lnet_ioctl_net_config *net_config);
int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
struct libcfs_ioctl_handler {
void lnet_rtrpools_disable(void);
void lnet_rtrpools_free(int keep_pools);
lnet_remotenet_t *lnet_find_net_locked (__u32 net);
-int lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
- __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
- __s32 credits);
+int lnet_dyn_add_ni(lnet_pid_t requested_pid,
+ struct lnet_ioctl_config_data *conf);
int lnet_dyn_del_ni(__u32 net);
int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
int **ni_refs; /* percpt reference count */
long ni_last_alive; /* when I was last alive */
lnet_ni_status_t *ni_status; /* my health status */
+ /* per NI LND tunables */
+ struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
/* equivalent interfaces to use */
char *ni_interfaces[LNET_MAX_INTERFACES];
} lnet_ni_t;
peer->ibp_nid = nid;
peer->ibp_error = 0;
peer->ibp_last_alive = 0;
- peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS;
- peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits;
+ peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
+ peer->ibp_queue_depth = ni->ni_peertxcredits;
atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
}
struct ib_mr *
-kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
+kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
int negotiated_nfrags)
{
- __u16 nfrags = (negotiated_nfrags != -1) ?
- negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand;
+ kib_net_t *net = ni->ni_data;
+ kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ int mod;
+ __u16 nfrags;
+
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ mod = tunables->lnd_map_on_demand;
+ nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
LASSERT(hdev->ibh_mrs != NULL);
- if (*kiblnd_tunables.kib_map_on_demand > 0 &&
- nfrags <= rd->rd_nfrags)
+ if (mod > 0 && nfrags <= rd->rd_nfrags)
return NULL;
return hdev->ibh_mrs;
}
}
-static int kiblnd_fmr_pool_size(int ncpts)
+static int
+kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+ int ncpts)
{
- int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts;
+ int size = tunables->lnd_fmr_pool_size / ncpts;
return max(IBLND_FMR_POOL, size);
}
-static int kiblnd_fmr_flush_trigger(int ncpts)
+static int
+kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables,
+ int ncpts)
{
- int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts;
+ int size = tunables->lnd_fmr_flush_trigger / ncpts;
return max(IBLND_FMR_POOL_FLUSH, size);
}
.dirty_watermark = fps->fps_flush_trigger,
.flush_function = NULL,
.flush_arg = NULL,
- .cache = !!*kiblnd_tunables.kib_fmr_cache};
+ .cache = !!fps->fps_cache };
int rc = 0;
fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
}
static int
-kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net,
- int pool_size, int flush_trigger)
+kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts,
+ kib_net_t *net,
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables)
{
kib_fmr_pool_t *fpo;
int rc;
fps->fps_net = net;
fps->fps_cpt = cpt;
- fps->fps_pool_size = pool_size;
- fps->fps_flush_trigger = flush_trigger;
+
+ fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts);
+ fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts);
+ fps->fps_cache = tunables->lnd_fmr_cache;
+
spin_lock_init(&fps->fps_lock);
INIT_LIST_HEAD(&fps->fps_pool_list);
INIT_LIST_HEAD(&fps->fps_failed_pool_list);
}
static int
-kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
+kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, int ncpts)
{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
unsigned long flags;
int cpt;
int rc;
int i;
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
- if (*kiblnd_tunables.kib_map_on_demand == 0) {
+ if (tunables->lnd_map_on_demand == 0) {
read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
flags);
goto create_tx_pool;
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- if (*kiblnd_tunables.kib_fmr_pool_size <
- *kiblnd_tunables.kib_ntx / 4) {
+ if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
- *kiblnd_tunables.kib_fmr_pool_size,
+ tunables->lnd_fmr_pool_size,
*kiblnd_tunables.kib_ntx / 4);
rc = -EINVAL;
goto failed;
for (i = 0; i < ncpts; i++) {
cpt = (cpts == NULL) ? i : cpts[i];
- rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net,
- kiblnd_fmr_pool_size(ncpts),
- kiblnd_fmr_flush_trigger(ncpts));
+ rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts,
+ net, tunables);
if (rc != 0) {
CERROR("Can't initialize FMR pool for CPT %d: %d\n",
cpt, rc);
do_gettimeofday(&tv);
net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout;
- ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits;
- ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits;
- ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits;
+ kiblnd_tunables_setup(ni);
if (ni->ni_interfaces[0] != NULL) {
/* Use the IPoIB interface specified in 'networks=' */
if (rc != 0)
goto failed;
- rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts);
+ rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts);
if (rc != 0) {
CERROR("Failed to initialize NI pools: %d\n", rc);
goto failed;
int *kib_timeout; /* comms timeout (seconds) */
int *kib_keepalive; /* keepalive timeout (seconds) */
int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peertxcredits; /* # concurrent sends to 1 peer */
- int *kib_peerrtrcredits; /* # per-peer router buffer credits */
- int *kib_peercredits_hiw; /* # when eagerly to return credits */
- int *kib_peertimeout; /* seconds to consider peer dead */
char **kib_default_ipif; /* default IPoIB interface */
int *kib_retry_count;
int *kib_rnr_retry_count;
- int *kib_concurrent_sends; /* send work queue sizing */
int *kib_ib_mtu; /* IB MTU */
- int *kib_map_on_demand; /* map-on-demand if RD has more fragments
- * than this value, 0 disable map-on-demand */
- int *kib_fmr_pool_size; /* # FMRs in pool */
- int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
- int *kib_fmr_cache; /* enable FMR pool cache? */
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
struct ctl_table_header *kib_sysctl; /* sysctl interface */
#endif
#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1) /* Max # of peer credits */
-#define IBLND_MSG_QUEUE_SIZE(v) ((v) == IBLND_MSG_VERSION_1 ? \
- IBLND_MSG_QUEUE_SIZE_V1 : \
- *kiblnd_tunables.kib_peertxcredits) /* # messages/RDMAs in-flight */
-#define IBLND_CREDITS_HIGHWATER(v) ((v) == IBLND_MSG_VERSION_1 ? \
- IBLND_CREDIT_HIGHWATER_V1 : \
- *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
+/* when eagerly to return credits */
+#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
+ IBLND_CREDIT_HIGHWATER_V1 : \
+ t->lnd_peercredits_hiw)
#ifdef HAVE_RDMA_CREATE_ID_4ARG
#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
#endif
-static inline int
-kiblnd_concurrent_sends_v1(void)
-{
- if (*kiblnd_tunables.kib_concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
- return IBLND_MSG_QUEUE_SIZE_V1 * 2;
-
- if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
- return IBLND_MSG_QUEUE_SIZE_V1 / 2;
-
- return *kiblnd_tunables.kib_concurrent_sends;
-}
-
-#define IBLND_CONCURRENT_SENDS(v) ((v) == IBLND_MSG_VERSION_1 ? \
- kiblnd_concurrent_sends_v1() : \
- *kiblnd_tunables.kib_concurrent_sends)
/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1)
#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0)
#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */
-#define IBLND_CFG_RDMA_FRAGS (*kiblnd_tunables.kib_map_on_demand != 0 ? \
- *kiblnd_tunables.kib_map_on_demand : \
- IBLND_MAX_RDMA_FRAGS) /* max # of fragments configured by user */
-#define IBLND_RDMA_FRAGS(v) ((v) == IBLND_MSG_VERSION_1 ? \
- IBLND_MAX_RDMA_FRAGS : IBLND_CFG_RDMA_FRAGS)
/************************/
/* derived constants... */
/* WRs and CQEs (per connection) */
#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
#define IBLND_SEND_WRS(c) \
- ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
+ ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
+ c->ibc_peer->ibp_ni))
#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev;
int fps_cpt; /* CPT id */
int fps_pool_size;
int fps_flush_trigger;
+ int fps_cache;
/* is allocating new pool */
int fps_increasing;
/* time stamp for retry if failed to allocate */
extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
+int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
+
+/* max # of fragments configured by user */
+static inline int
+kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
+{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ int mod;
+
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ mod = tunables->lnd_map_on_demand;
+ return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
+}
+
+static inline int
+kiblnd_rdma_frags(int version, struct lnet_ni *ni)
+{
+ return version == IBLND_MSG_VERSION_1 ?
+ IBLND_MAX_RDMA_FRAGS :
+ kiblnd_cfg_rdma_frags(ni);
+}
+
+static inline int
+kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
+{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ int concurrent_sends;
+
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ concurrent_sends = tunables->lnd_concurrent_sends;
+
+ if (version == IBLND_MSG_VERSION_1) {
+ if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
+ return IBLND_MSG_QUEUE_SIZE_V1 * 2;
+
+ if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
+ return IBLND_MSG_QUEUE_SIZE_V1 / 2;
+ }
+
+ return concurrent_sends;
+}
+
static inline void
kiblnd_hdev_addref_locked(kib_hca_dev_t *hdev)
{
static inline int
kiblnd_need_noop(kib_conn_t *conn)
{
- LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+ lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+
+ LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
if (conn->ibc_outstanding_credits <
- IBLND_CREDITS_HIGHWATER(conn->ibc_version) &&
+ IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
!kiblnd_send_keepalive(conn))
return 0; /* No need to send NOOP */
#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
- kib_rdma_desc_t *rd,
+struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
int negotiated_nfrags);
void kiblnd_map_rx_descs(kib_conn_t *conn);
void kiblnd_unmap_rx_descs(kib_conn_t *conn);
__u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr);
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
+int kiblnd_tunables_setup(struct lnet_ni *ni);
int kiblnd_tunables_init(void);
void kiblnd_tunables_fini(void);
static int
kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
{
- kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
kib_net_t *net = ni->ni_data;
+ kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev;
struct ib_mr *mr = NULL;
__u32 nob;
int i;
nob += rd->rd_frags[i].rf_nob;
}
- mr = kiblnd_find_rd_dma_mr(hdev, rd,
+ mr = kiblnd_find_rd_dma_mr(ni, rd,
(tx->tx_conn != NULL) ?
tx->tx_conn->ibc_max_frags : -1);
if (mr != NULL) {
{
kib_msg_t *msg = tx->tx_msg;
kib_peer_t *peer = conn->ibc_peer;
+ struct lnet_ni *ni = peer->ibp_ni;
int ver = conn->ibc_version;
int rc;
int done;
LASSERT(conn->ibc_credits >= 0);
LASSERT(conn->ibc_credits <= conn->ibc_queue_depth);
- if (conn->ibc_nsends_posted == IBLND_CONCURRENT_SENDS(ver)) {
+ if (conn->ibc_nsends_posted ==
+ kiblnd_concurrent_sends(ver, ni)) {
/* tx completions outstanding... */
CDEBUG(D_NET, "%s: posted enough\n",
libcfs_nid2str(peer->ibp_nid));
spin_lock(&conn->ibc_lock);
- LASSERT (conn->ibc_nsends_posted <= IBLND_CONCURRENT_SENDS(ver));
+ LASSERT(conn->ibc_nsends_posted <=
+ kiblnd_concurrent_sends(ver, ni));
LASSERT (!IBLND_OOB_CAPABLE(ver) ||
conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
LASSERT (conn->ibc_reserved_credits >= 0);
}
if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
- IBLND_MSG_QUEUE_SIZE(version)) {
+ kiblnd_msg_queue_size(version, ni)) {
CERROR("Can't accept conn from %s, queue depth too large: "
" %d (<=%d wanted)\n",
libcfs_nid2str(nid),
reqmsg->ibm_u.connparams.ibcp_queue_depth,
- IBLND_MSG_QUEUE_SIZE(version));
+ kiblnd_msg_queue_size(version, ni));
if (version == IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE;
}
if (reqmsg->ibm_u.connparams.ibcp_max_frags >
- IBLND_RDMA_FRAGS(version)) {
+ kiblnd_rdma_frags(version, ni)) {
CWARN("Can't accept conn from %s (version %x): "
"max_frags %d too large (%d wanted)\n",
- libcfs_nid2str(nid), version,
- reqmsg->ibm_u.connparams.ibcp_max_frags,
- IBLND_RDMA_FRAGS(version));
+ libcfs_nid2str(nid), version,
+ reqmsg->ibm_u.connparams.ibcp_max_frags,
+ kiblnd_rdma_frags(version, ni));
if (version >= IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed;
} else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
- IBLND_RDMA_FRAGS(version) && net->ibn_fmr_ps == NULL) {
+ kiblnd_rdma_frags(version, ni) &&
+ net->ibn_fmr_ps == NULL) {
CWARN("Can't accept conn from %s (version %x): "
"max_frags %d incompatible without FMR pool "
"(%d wanted)\n",
libcfs_nid2str(nid), version,
reqmsg->ibm_u.connparams.ibcp_max_frags,
- IBLND_RDMA_FRAGS(version));
+ kiblnd_rdma_frags(version, ni));
- if (version >= IBLND_MSG_VERSION)
+ if (version == IBLND_MSG_VERSION)
rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
goto failed;
if (ni != NULL)
lnet_ni_decref(ni);
- rej.ibr_version = version;
- rej.ibr_cp.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE(version);
- rej.ibr_cp.ibcp_max_frags = IBLND_RDMA_FRAGS(version);
- kiblnd_reject(cmid, &rej);
+ rej.ibr_version = version;
+ rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
+ rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+ kiblnd_reject(cmid, &rej);
- return -ECONNREFUSED;
+ return -ECONNREFUSED;
}
static void
reason = "Unknown";
break;
- case IBLND_REJECT_RDMA_FRAGS:
+ case IBLND_REJECT_RDMA_FRAGS: {
+ struct lnet_ioctl_config_lnd_tunables *tunables;
+
if (!cp) {
reason = "can't negotiate max frags";
goto out;
}
- if (*kiblnd_tunables.kib_map_on_demand == 0) {
+ tunables = peer->ibp_ni->ni_lnd_tunables;
+ if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
reason = "map_on_demand must be enabled";
goto out;
}
peer->ibp_max_frags = frag_num;
reason = "rdma fragments";
break;
-
+ }
case IBLND_REJECT_MSG_QUEUE_SIZE:
if (!cp) {
reason = "can't negotiate queue depth";
CFS_MODULE_PARM(dev_failover, "i", int, 0444,
"HCA failover for bonding (0 off, 1 on, other values reserved)");
-
static int require_privileged_port = 0;
CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
"require privileged port when accepting connection");
.kib_timeout = &timeout,
.kib_keepalive = &keepalive,
.kib_ntx = &ntx,
- .kib_credits = &credits,
- .kib_peertxcredits = &peer_credits,
- .kib_peercredits_hiw = &peer_credits_hiw,
- .kib_peerrtrcredits = &peer_buffer_credits,
- .kib_peertimeout = &peer_timeout,
.kib_default_ipif = &ipif_name,
.kib_retry_count = &retry_count,
.kib_rnr_retry_count = &rnr_retry_count,
- .kib_concurrent_sends = &concurrent_sends,
.kib_ib_mtu = &ib_mtu,
- .kib_map_on_demand = &map_on_demand,
- .kib_fmr_pool_size = &fmr_pool_size,
- .kib_fmr_flush_trigger = &fmr_flush_trigger,
- .kib_fmr_cache = &fmr_cache,
.kib_require_priv_port = &require_privileged_port,
.kib_use_priv_port = &use_privileged_port,
.kib_nscheds = &nscheds
};
+static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
+
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
static char ipif_basename_space[32];
kiblnd_sysctl_fini (void)
{
}
-
#endif
+/* # messages/RDMAs in-flight */
int
-kiblnd_tunables_init (void)
+kiblnd_msg_queue_size(int version, lnet_ni_t *ni)
{
- if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
- CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
- *kiblnd_tunables.kib_ib_mtu);
- return -EINVAL;
- }
-
- if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
- *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
-
- if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
- *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
-
- if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
- *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
-
- if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
- *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
-
- if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
- *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
-
- if (*kiblnd_tunables.kib_map_on_demand < 0 ||
- *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
- *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
-
- if (*kiblnd_tunables.kib_map_on_demand == 1)
- *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
-
- if (*kiblnd_tunables.kib_concurrent_sends == 0) {
- if (*kiblnd_tunables.kib_map_on_demand > 0 &&
- *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
- *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
- else
- *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
- }
-
- if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
- *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
-
- if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
- *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
+ if (version == IBLND_MSG_VERSION_1)
+ return IBLND_MSG_QUEUE_SIZE_V1;
+ else if (ni)
+ return ni->ni_peertxcredits;
+ else
+ return peer_credits;
+}
- if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
- CWARN("Concurrent sends %d is lower than message queue size: %d, "
- "performance may drop slightly.\n",
- *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
- }
+int
+kiblnd_tunables_setup(lnet_ni_t *ni)
+{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+
+ /*
+ * if there was no tunables specified, setup the tunables to be
+ * defaulted
+ */
+ if (!ni->ni_lnd_tunables) {
+ LIBCFS_ALLOC(ni->ni_lnd_tunables,
+ sizeof(*ni->ni_lnd_tunables));
+ if (!ni->ni_lnd_tunables)
+ return -ENOMEM;
+
+ memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+ &default_tunables, sizeof(*tunables));
+ }
+ tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+ /* Current API version */
+ tunables->lnd_version = 0;
+
+ if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
+ CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
+ *kiblnd_tunables.kib_ib_mtu);
+ return -EINVAL;
+ }
+
+ if (!ni->ni_peertimeout)
+ ni->ni_peertimeout = peer_timeout;
+
+ if (!ni->ni_maxtxcredits)
+ ni->ni_maxtxcredits = credits;
+
+ if (!ni->ni_peertxcredits)
+ ni->ni_peertxcredits = peer_credits;
+
+ if (!ni->ni_peerrtrcredits)
+ ni->ni_peerrtrcredits = peer_buffer_credits;
+
+ if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
+ ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
+
+ if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
+ ni->ni_peertxcredits = IBLND_CREDITS_MAX;
+
+ if (ni->ni_peertxcredits > credits)
+ ni->ni_peertxcredits = credits;
+
+ if (!tunables->lnd_peercredits_hiw)
+ tunables->lnd_peercredits_hiw = peer_credits_hiw;
+
+ if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
+ tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
+
+ if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
+ tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
+
+ if (tunables->lnd_map_on_demand < 0 ||
+ tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
+ /* disable map-on-demand */
+ tunables->lnd_map_on_demand = 0;
+ }
+
+ if (tunables->lnd_map_on_demand == 1) {
+ /* don't make sense to create map if only one fragment */
+ tunables->lnd_map_on_demand = 2;
+ }
+
+ if (tunables->lnd_concurrent_sends == 0) {
+ if (tunables->lnd_map_on_demand > 0 &&
+ tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
+ tunables->lnd_concurrent_sends =
+ ni->ni_peertxcredits * 2;
+ } else {
+ tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+ }
+ }
+
+ if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
+ tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
+
+ if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
+ tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
+
+ if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
+ CWARN("Concurrent sends %d is lower than message "
+ "queue size: %d, performance may drop slightly.\n",
+ tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
+ }
+
+ if (!tunables->lnd_fmr_pool_size)
+ tunables->lnd_fmr_pool_size = fmr_pool_size;
+ if (!tunables->lnd_fmr_flush_trigger)
+ tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
+ if (!tunables->lnd_fmr_cache)
+ tunables->lnd_fmr_cache = fmr_cache;
+
+ return 0;
+}
- kiblnd_sysctl_init();
- return 0;
+int
+kiblnd_tunables_init(void)
+{
+ default_tunables.lnd_version = 0;
+ default_tunables.lnd_peercredits_hiw = peer_credits_hiw,
+ default_tunables.lnd_map_on_demand = map_on_demand;
+ default_tunables.lnd_concurrent_sends = concurrent_sends;
+ default_tunables.lnd_fmr_pool_size = fmr_pool_size;
+ default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
+ default_tunables.lnd_fmr_cache = fmr_cache;
+
+ kiblnd_sysctl_init();
+ return 0;
}
void
-kiblnd_tunables_fini (void)
+kiblnd_tunables_fini(void)
{
- kiblnd_sysctl_fini();
+ kiblnd_sysctl_fini();
}
}
static int
-lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
- __s32 peer_cr, __s32 peer_buf_cr, __s32 credits)
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
{
+ struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
int rc = -EINVAL;
__u32 lnd_type;
lnd_t *lnd;
ni->ni_lnd = lnd;
+ if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
+ lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+
+ if (lnd_tunables != NULL) {
+ LIBCFS_ALLOC(ni->ni_lnd_tunables,
+ sizeof(*ni->ni_lnd_tunables));
+ if (ni->ni_lnd_tunables == NULL) {
+ LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
+ rc = -ENOMEM;
+ goto failed0;
+ }
+ memcpy(ni->ni_lnd_tunables, lnd_tunables,
+ sizeof(*ni->ni_lnd_tunables));
+ }
+
rc = (lnd->lnd_startup)(ni);
LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
/* If given some LND tunable parameters, parse those now to
* override the values in the NI structure. */
- if (peer_buf_cr >= 0)
- ni->ni_peerrtrcredits = peer_buf_cr;
- if (peer_timeout >= 0)
- ni->ni_peertimeout = peer_timeout;
+ if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) {
+ ni->ni_peerrtrcredits =
+ conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+ }
+ if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) {
+ ni->ni_peertimeout =
+ conf->cfg_config_u.cfg_net.net_peer_timeout;
+ }
+
/*
* TODO
* Note: For now, don't allow the user to change
* peertxcredits as this number is used in the
* IB LND to control queue depth.
- * if (peer_cr != -1)
- * ni->ni_peertxcredits = peer_cr;
+ *
+ * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
+ * ni->ni_peertxcredits =
+ * conf->cfg_config_u.cfg_net.net_peer_tx_credits;
*/
- if (credits >= 0)
- ni->ni_maxtxcredits = credits;
+ if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) {
+ ni->ni_maxtxcredits =
+ conf->cfg_config_u.cfg_net.net_max_tx_credits;
+ }
LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
while (!list_empty(nilist)) {
ni = list_entry(nilist->next, lnet_ni_t, ni_list);
list_del(&ni->ni_list);
- rc = lnet_startup_lndni(ni, -1, -1, -1, -1);
+ rc = lnet_startup_lndni(ni, NULL);
if (rc < 0)
goto failed;
* \param[out] net_config Network configuration
*/
static void
-lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
- int *peer_timeout, int *peer_tx_credits,
- int *peer_rtr_credits, int *max_tx_credits,
- struct lnet_ioctl_net_config *net_config)
+lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
{
+ struct lnet_ioctl_net_config *net_config;
+ struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
+ size_t min_size, tunable_size = 0;
int i;
- if (ni == NULL)
+ if (!ni || !config)
return;
- if (net_config == NULL)
+ net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
+ if (!net_config)
return;
CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
}
}
- *nid = ni->ni_nid;
- *peer_timeout = ni->ni_peertimeout;
- *peer_tx_credits = ni->ni_peertxcredits;
- *peer_rtr_credits = ni->ni_peerrtrcredits;
- *max_tx_credits = ni->ni_maxtxcredits;
+ config->cfg_nid = ni->ni_nid;
+ config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
+ config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
+ config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
+ config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
net_config->ni_status = ni->ni_status->ns_status;
i++)
net_config->ni_cpts[i] = ni->ni_cpts[i];
- *cpt_count = ni->ni_ncpts;
+ config->cfg_ncpts = ni->ni_ncpts;
+
+ /*
+ * See if user land tools sent in a newer and larger version
+ * of struct lnet_tunables than what the kernel uses.
+ */
+ min_size = sizeof(*config) + sizeof(*net_config);
+
+ if (config->cfg_hdr.ioc_len > min_size)
+ tunable_size = config->cfg_hdr.ioc_len - min_size;
+
+ /* Don't copy to much data to user space */
+ min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+ lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
+
+ if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
+ memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+ config->cfg_config_u.cfg_net.net_interface_count = 1;
+
+ /* Tell user land that kernel side has less data */
+ if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+ min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
+ config->cfg_hdr.ioc_len -= min_size;
+ }
+ }
}
-int
-lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
- int *peer_tx_credits, int *peer_rtr_credits,
- int *max_tx_credits,
- struct lnet_ioctl_net_config *net_config)
+static int
+lnet_get_net_config(struct lnet_ioctl_config_data *config)
{
- struct lnet_ni *ni;
- struct list_head *tmp;
- int cpt;
- int rc = -ENOENT;
+ struct lnet_ni *ni;
+ struct list_head *tmp;
+ int idx = config->cfg_count;
+ int rc = -ENOENT;
+ int cpt;
+
+ if (unlikely(!config->cfg_bulk))
+ return -EINVAL;
cpt = lnet_net_lock_current();
if (idx-- == 0) {
rc = 0;
lnet_ni_lock(ni);
- lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
- peer_tx_credits, peer_rtr_credits,
- max_tx_credits, net_config);
+ lnet_fill_ni_info(ni, config);
lnet_ni_unlock(ni);
break;
}
}
int
-lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
- __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
- __s32 credits)
+lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
{
+ char *nets = conf->cfg_config_u.cfg_net.net_intf;
lnet_ping_info_t *pinfo;
lnet_handle_md_t md_handle;
struct lnet_ni *ni;
list_del_init(&ni->ni_list);
- rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
- peer_buf_cr, credits);
+ rc = lnet_startup_lndni(ni, conf);
if (rc != 0)
goto failed1;
rtr_priority);
case IOC_LIBCFS_GET_NET: {
- struct lnet_ioctl_net_config *net_config;
- size_t total = sizeof(*config) + sizeof(*net_config);
-
+ size_t total = sizeof(*config) +
+ sizeof(struct lnet_ioctl_net_config);
config = arg;
if (config->cfg_hdr.ioc_len < total)
return -EINVAL;
- net_config = (struct lnet_ioctl_net_config *)
- config->cfg_bulk;
- if (net_config == NULL)
- return -EINVAL;
-
- return lnet_get_net_config(config->cfg_count,
- &config->cfg_ncpts,
- &config->cfg_nid,
- &config->cfg_config_u.
- cfg_net.net_peer_timeout,
- &config->cfg_config_u.cfg_net.
- net_peer_tx_credits,
- &config->cfg_config_u.cfg_net.
- net_peer_rtr_credits,
- &config->cfg_config_u.cfg_net.
- net_max_tx_credits,
- net_config);
+ return lnet_get_net_config(config);
}
case IOC_LIBCFS_GET_LNET_STATS:
if (ni->ni_cpts != NULL)
cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
+ if (ni->ni_lnd_tunables != NULL)
+ LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
+
for (i = 0; i < LNET_MAX_INTERFACES &&
ni->ni_interfaces[i] != NULL; i++) {
LIBCFS_FREE(ni->ni_interfaces[i],
LNET_MUTEX_LOCK(&lnet_config_mutex);
if (the_lnet.ln_niinit_self)
- rc = lnet_dyn_add_ni(LNET_PID_LUSTRE,
- conf->cfg_config_u.cfg_net.net_intf,
- conf->cfg_config_u.cfg_net.
- net_peer_timeout,
- conf->cfg_config_u.cfg_net.
- net_peer_tx_credits,
- conf->cfg_config_u.cfg_net.
- net_peer_rtr_credits,
- conf->cfg_config_u.cfg_net.
- net_max_tx_credits);
+ rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf);
else
rc = -EINVAL;
LNET_MUTEX_UNLOCK(&lnet_config_mutex);
CYAML := $(top_builddir)/lnet/utils/cyaml/cyaml.c \
$(top_builddir)/lnet/utils/cyaml/cyaml.h
-liblnetconfig_la_SOURCES = liblnetconfig.c liblnetconfig.h $(CYAML)
+liblnetconfig_la_SOURCES = liblnetconfig.c liblnetconfig.h \
+ liblnetconfig_lnd.c liblnd.h $(CYAML)
liblnetconfig_la_CPPFLAGS = -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 \
-DLUSTRE_UTILS=1 -I$(top_builddir)/lnet/utils/cyaml
-liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -version-info 1:0:0
+liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -version-info 1:1:0
EXTRA_DIST =
--- /dev/null
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * LGPL HEADER END
+ *
+ * Copyright (c) 2015, James Simmons <jsimmons@infradead.org>
+ */
+
+#ifndef LIB_LND_CONFIG_API_H
+#define LIB_LND_CONFIG_API_H
+
+#include <lnet/lib-dlc.h>
+#include "cyaml.h"
+
+int
+lustre_interface_show_net(struct cYAML *interfaces, unsigned int index,
+ bool detail, struct lnet_ioctl_config_data *data,
+ struct lnet_ioctl_net_config *net_config);
+
+void
+lustre_interface_parse(struct cYAML *lndparams, const char *dev_name,
+ struct lnet_ioctl_config_lnd_tunables *lnd_cfg);
+
+#endif /* LIB_LND_CONFIG_API_H */
#include <libcfs/util/ioctl.h>
#include <lnet/lnetctl.h>
#include <lnet/socklnd.h>
-#include <lnet/lnet.h>
+#include "liblnd.h"
#include "liblnetconfig.h"
#include "cyaml.h"
int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
int peer_to, int peer_cr, int peer_buf_cr,
int credits, char *smp, int seq_no,
+ struct lnet_ioctl_config_lnd_tunables *lnd_tunables,
struct cYAML **err_rc)
{
- struct lnet_ioctl_config_data data;
+ struct lnet_ioctl_config_lnd_tunables *lnd = NULL;
+ struct lnet_ioctl_config_data *data;
+ size_t ioctl_size = sizeof(*data);
char buf[LNET_MAX_STR_LEN];
int rc = LUSTRE_CFG_RC_NO_ERR;
char err_str[LNET_MAX_STR_LEN];
snprintf(err_str, sizeof(err_str), "\"success\"");
+ /* No need to register lo */
+ if (net != NULL && !strcmp(net, "lo"))
+ return 0;
+
if (ip2net == NULL && (intf == NULL || net == NULL)) {
snprintf(err_str,
sizeof(err_str),
goto out;
}
+ if (lnd_tunables != NULL)
+ ioctl_size += sizeof(*lnd_tunables);
+
+ data = calloc(1, ioctl_size);
+ if (data == NULL)
+ goto out;
+
if (ip2net == NULL)
snprintf(buf, sizeof(buf) - 1, "%s(%s)%s",
net, intf,
(smp) ? smp : "");
- LIBCFS_IOC_INIT_V2(data, cfg_hdr);
- strncpy(data.cfg_config_u.cfg_net.net_intf,
+ LIBCFS_IOC_INIT_V2(*data, cfg_hdr);
+ strncpy(data->cfg_config_u.cfg_net.net_intf,
(ip2net != NULL) ? ip2net : buf, sizeof(buf));
- data.cfg_config_u.cfg_net.net_peer_timeout = peer_to;
- data.cfg_config_u.cfg_net.net_peer_tx_credits = peer_cr;
- data.cfg_config_u.cfg_net.net_peer_rtr_credits = peer_buf_cr;
- data.cfg_config_u.cfg_net.net_max_tx_credits = credits;
+ data->cfg_config_u.cfg_net.net_peer_timeout = peer_to;
+ data->cfg_config_u.cfg_net.net_peer_tx_credits = peer_cr;
+ data->cfg_config_u.cfg_net.net_peer_rtr_credits = peer_buf_cr;
+ data->cfg_config_u.cfg_net.net_max_tx_credits = credits;
+ /* Add in tunable settings if available */
+ if (lnd_tunables != NULL) {
+ lnd = (struct lnet_ioctl_config_lnd_tunables *)data->cfg_bulk;
+
+ data->cfg_hdr.ioc_len = ioctl_size;
+ memcpy(lnd, lnd_tunables, sizeof(*lnd_tunables));
+ }
- rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_NET, &data);
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_NET, data);
if (rc < 0) {
rc = -errno;
snprintf(err_str,
sizeof(err_str),
"\"cannot add network: %s\"", strerror(errno));
}
+ free(data);
out:
cYAML_build_error(rc, seq_no, ADD_CMD, "net", err_str, err_rc);
struct cYAML **show_rc, struct cYAML **err_rc)
{
char *buf;
+ struct lnet_ioctl_config_lnd_tunables *lnd_cfg;
struct lnet_ioctl_config_data *data;
struct lnet_ioctl_net_config *net_config;
__u32 net = LNET_NIDNET(LNET_NID_ANY);
int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i, j;
int l_errno = 0;
- struct cYAML *root = NULL, *tunables = NULL,
- *net_node = NULL, *interfaces = NULL,
- *item = NULL, *first_seq = NULL;
+ struct cYAML *root = NULL, *tunables = NULL, *net_node = NULL,
+ *interfaces = NULL, *item = NULL, *first_seq = NULL;
int str_buf_len = LNET_MAX_SHOW_NUM_CPT * 2;
char str_buf[str_buf_len];
char *pos;
char err_str[LNET_MAX_STR_LEN];
bool exist = false;
+ size_t buf_len;
snprintf(err_str, sizeof(err_str), "\"out of memory\"");
- buf = calloc(1, sizeof(*data) + sizeof(*net_config));
+ buf_len = sizeof(*data) + sizeof(*net_config) + sizeof(*lnd_cfg);
+ buf = calloc(1, buf_len);
if (buf == NULL)
goto out;
for (i = 0;; i++) {
pos = str_buf;
- memset(buf, 0, sizeof(*data) + sizeof(*net_config));
+ memset(buf, 0, buf_len);
LIBCFS_IOC_INIT_V2(*data, cfg_hdr);
/*
* set the ioc_len to the proper value since INIT assumes
* size of data
*/
- data->cfg_hdr.ioc_len = sizeof(struct lnet_ioctl_config_data) +
- sizeof(struct lnet_ioctl_net_config);
+ data->cfg_hdr.ioc_len = buf_len;
data->cfg_count = i;
rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NET, data);
if (first_seq == NULL)
first_seq = item;
- if (cYAML_create_string(item,
- "net",
+ if (cYAML_create_string(item, "net",
libcfs_net2str(
LNET_NIDNET(data->cfg_nid)))
== NULL)
libcfs_nid2str(data->cfg_nid)) == NULL)
goto out;
- if (cYAML_create_string(item,
- "status",
+ if (cYAML_create_string(item, "status",
(net_config->ni_status ==
LNET_NI_STATUS_UP) ?
"up" : "down") == NULL)
goto out;
for (j = 0; j < LNET_MAX_INTERFACES; j++) {
- if (strlen(net_config->ni_interfaces[j]) > 0) {
- snprintf(str_buf,
- sizeof(str_buf), "%d", j);
- if (cYAML_create_string(interfaces,
- str_buf,
- net_config->ni_interfaces[j]) ==
- NULL)
- goto out;
- }
+ if (lustre_interface_show_net(interfaces, j,
+ detail, data,
+ net_config) < 0)
+ goto out;
}
}
if (cYAML_create_number(tunables, "peer_timeout",
data->cfg_config_u.cfg_net.
- net_peer_timeout) == NULL)
+ net_peer_timeout) == NULL)
goto out;
if (cYAML_create_number(tunables, "peer_credits",
data->cfg_config_u.cfg_net.
- net_peer_tx_credits) == NULL)
+ net_peer_tx_credits) == NULL)
goto out;
if (cYAML_create_number(tunables,
"peer_buffer_credits",
data->cfg_config_u.cfg_net.
- net_peer_rtr_credits) == NULL)
+ net_peer_rtr_credits) == NULL)
goto out;
if (cYAML_create_number(tunables, "credits",
data->cfg_config_u.cfg_net.
- net_max_tx_credits) == NULL)
+ net_max_tx_credits) == NULL)
goto out;
/* out put the CPTs in the format: "[x,x,x,...]" */
struct cYAML *net, *intf, *tunables, *seq_no,
*peer_to = NULL, *peer_buf_cr = NULL, *peer_cr = NULL,
*credits = NULL, *ip2net = NULL, *smp = NULL, *child;
+ struct lnet_ioctl_config_lnd_tunables *lnd_tunables_p = NULL;
+ struct lnet_ioctl_config_lnd_tunables lnd_tunables;
char devs[LNET_MAX_STR_LEN];
char *loc = devs;
int size = LNET_MAX_STR_LEN;
/* grab all the interfaces */
child = intf->cy_child;
while (child != NULL && size > 0) {
+ struct cYAML *lnd_params;
+
+ if (child->cy_valuestring == NULL)
+ goto ignore_child;
+
if (loc > devs)
num = snprintf(loc, size, ",%s",
child->cy_valuestring);
size -= num;
loc += num;
intf_found = true;
+
+ lnd_params = cYAML_get_object_item(intf,
+ "lnd tunables");
+ if (lnd_params != NULL) {
+ const char *dev_name = child->cy_valuestring;
+ lnd_tunables_p = &lnd_tunables;
+
+ lustre_interface_parse(lnd_params, dev_name,
+ lnd_tunables_p);
+ }
+ignore_child:
child = child->cy_next;
}
}
(credits) ? credits->cy_valueint : -1,
(smp) ? smp->cy_valuestring : NULL,
(seq_no) ? seq_no->cy_valueint : -1,
+ lnd_tunables_p,
err_rc);
}
#ifndef LIB_LNET_CONFIG_API_H
#define LIB_LNET_CONFIG_API_H
+#include <lnet/lnet.h>
+
#define LUSTRE_CFG_RC_NO_ERR 0
#define LUSTRE_CFG_RC_BAD_PARAM -1
#define LUSTRE_CFG_RC_MISSING_PARAM -2
* credits - network interface credits
* smp - cpu affinity
* seq_no - sequence number of the request
+ * lnd_tunables - lnet specific tunable parameters
* err_rc - [OUT] struct cYAML tree describing the error. Freed by caller
*/
int lustre_lnet_config_net(char *net, char *intf, char *ip2net,
int peer_to, int peer_cr, int peer_buf_cr,
int credits, char *smp, int seq_no,
+ struct lnet_ioctl_config_lnd_tunables *lnd_tunables,
struct cYAML **err_rc);
/*
--- /dev/null
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * LGPL HEADER END
+ *
+ * Copyright (c) 2015, James Simmons
+ *
+ * Author:
+ * James Simmons <jsimmons@infradead.org>
+ */
+#include <stdio.h>
+#include <string.h>
+#include <libcfs/util/ioctl.h>
+#include "liblnetconfig.h"
+#include "cyaml.h"
+
+static int
+lustre_ko2iblnd_show_net(struct cYAML *lndparams,
+ struct lnet_ioctl_config_lnd_tunables *tunables)
+{
+ struct lnet_ioctl_config_o2iblnd_tunables *lnd_cfg;
+
+ lnd_cfg = &tunables->lt_tun_u.lt_o2ib;
+
+ if (cYAML_create_number(lndparams, "peercredits_hiw",
+ lnd_cfg->lnd_peercredits_hiw) == NULL)
+ return -1;
+
+ if (cYAML_create_number(lndparams, "map_on_demand",
+ lnd_cfg->lnd_map_on_demand) == NULL)
+ return -1;
+
+ if (cYAML_create_number(lndparams, "concurrent_sends",
+ lnd_cfg->lnd_concurrent_sends) == NULL)
+ return -1;
+
+ if (cYAML_create_number(lndparams, "fmr_pool_size",
+ lnd_cfg->lnd_fmr_pool_size) == NULL)
+ return -1;
+
+ if (cYAML_create_number(lndparams, "fmr_flush_trigger",
+ lnd_cfg->lnd_fmr_flush_trigger) == NULL)
+ return -1;
+
+ if (cYAML_create_number(lndparams, "fmr_cache",
+ lnd_cfg->lnd_fmr_cache) == NULL)
+ return -1;
+ return 0;
+}
+
+int
+lustre_interface_show_net(struct cYAML *interfaces, unsigned int index,
+ bool detail, struct lnet_ioctl_config_data *data,
+ struct lnet_ioctl_net_config *net_config)
+{
+ char ni_index[2]; /* LNET_MAX_INTERFACES is only 16 */
+
+ if (strlen(net_config->ni_interfaces[index]) == 0)
+ return 0;
+
+ snprintf(ni_index, sizeof(ni_index), "%d", index);
+ if (cYAML_create_string(interfaces, ni_index,
+ net_config->ni_interfaces[index]) == NULL)
+ return -1;
+
+ if (detail) {
+ __u32 net = LNET_NETTYP(LNET_NIDNET(data->cfg_nid));
+ struct lnet_ioctl_config_lnd_tunables *lnd_cfg;
+ struct cYAML *lndparams;
+
+ if (data->cfg_config_u.cfg_net.net_interface_count == 0 ||
+ net != O2IBLND)
+ return 0;
+
+ lndparams = cYAML_create_object(interfaces, "lnd tunables");
+ if (lndparams == NULL)
+ return -1;
+
+ lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
+ if (lustre_ko2iblnd_show_net(lndparams, lnd_cfg) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static void
+lustre_ko2iblnd_parse_net(struct cYAML *lndparams,
+ struct lnet_ioctl_config_lnd_tunables *lnd_cfg)
+{
+ struct cYAML *map_on_demand = NULL, *concurrent_sends = NULL;
+ struct cYAML *fmr_pool_size = NULL, *fmr_cache = NULL;
+ struct cYAML *fmr_flush_trigger = NULL;
+
+ map_on_demand = cYAML_get_object_item(lndparams, "map_on_demand");
+ lnd_cfg->lt_tun_u.lt_o2ib.lnd_map_on_demand =
+ (map_on_demand) ? map_on_demand->cy_valueint : 0;
+
+ concurrent_sends = cYAML_get_object_item(lndparams, "concurrent_sends");
+ lnd_cfg->lt_tun_u.lt_o2ib.lnd_concurrent_sends =
+ (concurrent_sends) ? concurrent_sends->cy_valueint : 0;
+
+ fmr_pool_size = cYAML_get_object_item(lndparams, "fmr_pool_size");
+ lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_pool_size =
+ (fmr_pool_size) ? fmr_pool_size->cy_valueint : 0;
+
+ fmr_flush_trigger = cYAML_get_object_item(lndparams,
+ "fmr_flush_trigger");
+ lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_flush_trigger =
+ (fmr_flush_trigger) ? fmr_flush_trigger->cy_valueint : 0;
+
+ fmr_cache = cYAML_get_object_item(lndparams, "fmr_cache");
+ lnd_cfg->lt_tun_u.lt_o2ib.lnd_fmr_cache =
+ (fmr_cache) ? fmr_cache->cy_valueint : 0;
+}
+
+void
+lustre_interface_parse(struct cYAML *lndparams, const char *dev_name,
+ struct lnet_ioctl_config_lnd_tunables *lnd_cfg)
+{
+ if (dev_name != NULL && strstr(dev_name, "ib"))
+ lustre_ko2iblnd_parse_net(lndparams, lnd_cfg);
+}
}
rc = lustre_lnet_config_net(network, intf, ip2net, pto, pc, pbc,
- cre, cpt, -1, &err_rc);
+ cre, cpt, -1, NULL, &err_rc);
if (rc != LUSTRE_CFG_RC_NO_ERR)
cYAML_print_tree2file(stderr, err_rc);