From 3c62efb37ec2757326eb0c24a78438b32d42bf2e Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Tue, 12 Jan 2016 17:09:31 -0800 Subject: [PATCH] LU-7734 lnet: configure peers from DLC This patch adds the ability to configure peers from the DLC interface. When a peer is added a primary NID should be provided. If none is provided then the first NID in the list of NIDs will be used as the primary NID. Basic error checking is done at the DLC level to ensure properly formatted NIDs. However, if a NID is a duplicate, this will be detected when adding it in the kernel. Operation is halted, which means some peer NIDs might have already been added, but not the entire set. It's the role of the caller to backtrack and remove that peer that failed to add. When deleting a peer a primary NID or a normal NID can be provided. If a standard NID is provided, then the peer is found, and the primary NID is compared to the peer ni. If they are the same the entire peer is deleted. Otherwise, only the identified peer ni is deleted. If a set of NIDs are provided each one will be removed from the peer identified by the peer NID in turn. The existing show peer credits API can be used to show peer information. Signed-off-by: Amir Shehata Change-Id: Iaf588a062b44d74305aa9aa7d31c7341c6c384b9 Reviewed-on: http://review.whamcloud.com/18476 Tested-by: Jenkins Reviewed-by: Doug Oucharek Tested-by: Maloo Reviewed-by: Olaf Weber --- libcfs/include/libcfs/libcfs_ioctl.h | 45 ++-- lnet/include/lnet/lib-dlc.h | 32 ++- lnet/include/lnet/lib-lnet.h | 20 +- lnet/include/lnet/lib-types.h | 4 + lnet/lnet/api-ni.c | 39 +++- lnet/lnet/lib-move.c | 4 +- lnet/lnet/peer.c | 377 +++++++++++++++++++++++++++--- lnet/lnet/router.c | 2 +- lnet/utils/lnetconfig/liblnetconfig.c | 416 +++++++++++++++++++++++++++++----- lnet/utils/lnetconfig/liblnetconfig.h | 57 ++++- lnet/utils/lnetctl.c | 220 +++++++++++++++--- 11 files changed, 1057 insertions(+), 159 deletions(-) diff --git a/libcfs/include/libcfs/libcfs_ioctl.h b/libcfs/include/libcfs/libcfs_ioctl.h index 40ff9f9..11292b2 100644 --- a/libcfs/include/libcfs/libcfs_ioctl.h +++ b/libcfs/include/libcfs/libcfs_ioctl.h @@ -128,18 +128,37 @@ struct libcfs_debug_ioctl_data * tools which might be accessing the IOCTL numbers, a new group of IOCTL * number have been allocated. */ -#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data -#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_MAX_NR 91 +#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data +#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_ADD_PEER_NI _IOWR(IOC_LIBCFS_TYPE, 92, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_DEL_PEER_NI _IOWR(IOC_LIBCFS_TYPE, 93, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_GET_PEER_NI _IOWR(IOC_LIBCFS_TYPE, 94, \ + IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_MAX_NR 94 + +extern int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data); #endif /* __LIBCFS_IOCTL_H__ */ diff --git a/lnet/include/lnet/lib-dlc.h b/lnet/include/lnet/lib-dlc.h index 69247f3..1213eb7 100644 --- a/lnet/include/lnet/lib-dlc.h +++ b/lnet/include/lnet/lib-dlc.h @@ -125,26 +125,36 @@ struct lnet_ioctl_config_data { char cfg_bulk[0]; }; +struct lnet_peer_ni_credit_info { + char cr_aliveness[LNET_MAX_STR_LEN]; + __u32 cr_refcount; + __s32 cr_ni_peer_tx_credits; + __s32 cr_peer_tx_credits; + __s32 cr_peer_rtr_credits; + __s32 cr_peer_min_rtr_credits; + __u32 cr_peer_tx_qnob; + __u32 cr_ncpt; +}; + struct lnet_ioctl_peer { struct libcfs_ioctl_hdr pr_hdr; __u32 pr_count; __u32 pr_pad; - __u64 pr_nid; + lnet_nid_t pr_nid; union { - struct { - char cr_aliveness[LNET_MAX_STR_LEN]; - __u32 cr_refcount; - __u32 cr_ni_peer_tx_credits; - __u32 cr_peer_tx_credits; - __u32 cr_peer_rtr_credits; - __u32 cr_peer_min_rtr_credits; - __u32 cr_peer_tx_qnob; - __u32 cr_ncpt; - } pr_peer_credits; + struct lnet_peer_ni_credit_info pr_peer_credits; } pr_lnd_u; }; +struct lnet_ioctl_peer_cfg { + struct libcfs_ioctl_hdr prcfg_hdr; + lnet_nid_t prcfg_key_nid; + lnet_nid_t prcfg_cfg_nid; + __u32 prcfg_idx; + char prcfg_bulk[0]; +}; + struct lnet_ioctl_lnet_stats { struct libcfs_ioctl_hdr st_hdr; struct lnet_counters st_cntrs; diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index eb77d0c..014ce10 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -799,21 +799,25 @@ struct lnet_peer_ni *lnet_get_next_peer_ni_locked(struct lnet_peer *peer, int lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt, struct lnet_peer **peer); int lnet_nid2peerni_locked(struct lnet_peer_ni **lpp, lnet_nid_t nid, int cpt); -struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid, int cpt); +struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid); void lnet_peer_tables_cleanup(lnet_ni_t *ni); -void lnet_peer_tables_destroy(void); +void lnet_peer_uninit(void); int lnet_peer_tables_create(void); void lnet_debug_peer(lnet_nid_t nid); struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id); bool lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni, struct lnet_ni *ni); -int lnet_get_peer_info(__u32 peer_index, __u64 *nid, - char alivness[LNET_MAX_STR_LEN], - __u32 *cpt_iter, __u32 *refcount, - __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, - __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis, - __u32 *peer_tx_qnob); +int lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid); +int lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid); +int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid, + struct lnet_peer_ni_credit_info *peer_ni_info); +int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, + char alivness[LNET_MAX_STR_LEN], + __u32 *cpt_iter, __u32 *refcount, + __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, + __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis, + __u32 *peer_tx_qnob); static inline bool lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index c073953..1799889 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -424,6 +424,8 @@ typedef struct { struct lnet_peer_ni { /* cahian on peer_net */ struct list_head lpni_on_peer_net_list; + /* chain on remote peer list */ + struct list_head lpni_on_remote_peer_ni_list; /* chain on peer hash */ struct list_head lpni_hashlist; /* messages blocking for tx credits */ @@ -744,6 +746,8 @@ typedef struct struct lnet_peer_table **ln_peer_tables; /* list of configured or discovered peers */ struct list_head ln_peers; + /* list of peer nis not on a local network */ + struct list_head ln_remote_peer_ni_list; /* failure simulation */ struct list_head ln_test_peers; struct list_head ln_drop_rules; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 8f5dc24..f4587da 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -599,6 +599,7 @@ lnet_prepare(lnet_pid_t requested_pid) INIT_LIST_HEAD(&the_lnet.ln_test_peers); INIT_LIST_HEAD(&the_lnet.ln_peers); + INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list); INIT_LIST_HEAD(&the_lnet.ln_nets); INIT_LIST_HEAD(&the_lnet.ln_routers); INIT_LIST_HEAD(&the_lnet.ln_drop_rules); @@ -697,7 +698,7 @@ lnet_unprepare (void) lnet_res_container_cleanup(&the_lnet.ln_eq_container); lnet_msg_containers_destroy(); - lnet_peer_tables_destroy(); + lnet_peer_uninit(); lnet_rtrpools_free(0); if (the_lnet.ln_counters != NULL) { @@ -2401,13 +2402,33 @@ LNetCtl(unsigned int cmd, void *arg) return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg); } + case IOC_LIBCFS_ADD_PEER_NI: { + struct lnet_ioctl_peer_cfg *cfg = arg; + + if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg)) + return -EINVAL; + + return lnet_add_peer_ni_to_peer(cfg->prcfg_key_nid, + cfg->prcfg_cfg_nid); + } + + case IOC_LIBCFS_DEL_PEER_NI: { + struct lnet_ioctl_peer_cfg *cfg = arg; + + if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg)) + return -EINVAL; + + return lnet_del_peer_ni_from_peer(cfg->prcfg_key_nid, + cfg->prcfg_cfg_nid); + } + case IOC_LIBCFS_GET_PEER_INFO: { struct lnet_ioctl_peer *peer_info = arg; if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info)) return -EINVAL; - return lnet_get_peer_info( + return lnet_get_peer_ni_info( peer_info->pr_count, &peer_info->pr_nid, peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness, @@ -2420,6 +2441,20 @@ LNetCtl(unsigned int cmd, void *arg) &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob); } + case IOC_LIBCFS_GET_PEER_NI: { + struct lnet_ioctl_peer_cfg *cfg = arg; + struct lnet_peer_ni_credit_info *lpni_cri; + size_t total = sizeof(*cfg) + sizeof(*lpni_cri); + + if (cfg->prcfg_hdr.ioc_len < total) + return -EINVAL; + + lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk; + + return lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_key_nid, + &cfg->prcfg_cfg_nid, lpni_cri); + } + case IOC_LIBCFS_NOTIFY_ROUTER: { unsigned long jiffies_passed; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 769856c..8b36ca4 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1487,7 +1487,7 @@ pick_peer: * received the message on if possible. If not, then pick * a peer_ni to send to */ - best_lpni = lnet_find_peer_ni_locked(dst_nid, cpt); + best_lpni = lnet_find_peer_ni_locked(dst_nid); if (best_lpni) { lnet_peer_ni_decref_locked(best_lpni); goto send; @@ -1527,7 +1527,7 @@ pick_peer: libcfs_nid2str(best_gw->lpni_nid), lnet_msgtyp2str(msg->msg_type), msg->msg_len); - best_lpni = lnet_find_peer_ni_locked(dst_nid, cpt); + best_lpni = lnet_find_peer_ni_locked(dst_nid); LASSERT(best_lpni != NULL); lnet_peer_ni_decref_locked(best_lpni); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 0276756..4fd1e16 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -37,6 +37,42 @@ #include #include +static void +lnet_peer_remove_from_remote_list(struct lnet_peer_ni *lpni) +{ + if (!list_empty(&lpni->lpni_on_remote_peer_ni_list)) { + list_del_init(&lpni->lpni_on_remote_peer_ni_list); + lnet_peer_ni_decref_locked(lpni); + } +} + +static void +lnet_peer_tables_destroy(void) +{ + struct lnet_peer_table *ptable; + struct list_head *hash; + int i; + int j; + + if (!the_lnet.ln_peer_tables) + return; + + cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { + hash = ptable->pt_hash; + if (!hash) /* not intialized */ + break; + + ptable->pt_hash = NULL; + for (j = 0; j < LNET_PEER_HASH_SIZE; j++) + LASSERT(list_empty(&hash[j])); + + LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash)); + } + + cfs_percpt_free(the_lnet.ln_peer_tables); + the_lnet.ln_peer_tables = NULL; +} + int lnet_peer_tables_create(void) { @@ -69,31 +105,27 @@ lnet_peer_tables_create(void) return 0; } -void -lnet_peer_tables_destroy(void) +void lnet_peer_uninit() { - struct lnet_peer_table *ptable; - struct list_head *hash; - int i; - int j; - - if (the_lnet.ln_peer_tables == NULL) - return; - - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - hash = ptable->pt_hash; - if (hash == NULL) /* not intialized */ - break; - - ptable->pt_hash = NULL; - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - LASSERT(list_empty(&hash[j])); - - LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash)); + int cpt; + struct lnet_peer_ni *lpni, *tmp; + struct lnet_peer_table *ptable = NULL; + + /* remove all peer_nis from the remote peer and he hash list */ + list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list, + lpni_on_remote_peer_ni_list) { + list_del_init(&lpni->lpni_on_remote_peer_ni_list); + lnet_peer_ni_decref_locked(lpni); + + cpt = lnet_cpt_of_nid_locked(lpni->lpni_nid, NULL); + ptable = the_lnet.ln_peer_tables[cpt]; + ptable->pt_zombies++; + + list_del_init(&lpni->lpni_hashlist); + lnet_peer_ni_decref_locked(lpni); } - cfs_percpt_free(the_lnet.ln_peer_tables); - the_lnet.ln_peer_tables = NULL; + lnet_peer_tables_destroy(); } static void @@ -213,10 +245,13 @@ lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) } struct lnet_peer_ni * -lnet_find_peer_ni_locked(lnet_nid_t nid, int cpt) +lnet_find_peer_ni_locked(lnet_nid_t nid) { struct lnet_peer_ni *lpni; struct lnet_peer_table *ptable; + int cpt; + + cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); ptable = the_lnet.ln_peer_tables[cpt]; lpni = lnet_get_peer_ni_locked(ptable, nid); @@ -229,7 +264,7 @@ lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt, struct lnet_peer ** { struct lnet_peer_ni *lpni; - lpni = lnet_find_peer_ni_locked(dst_nid, cpt); + lpni = lnet_find_peer_ni_locked(dst_nid); if (!lpni) { int rc; rc = lnet_nid2peerni_locked(&lpni, dst_nid, cpt); @@ -244,6 +279,24 @@ lnet_find_or_create_peer_locked(lnet_nid_t dst_nid, int cpt, struct lnet_peer ** } struct lnet_peer_ni * +lnet_get_peer_ni_idx_locked(int idx, struct lnet_peer_net **lpn, + struct lnet_peer **lp) +{ + struct lnet_peer_ni *lpni; + + list_for_each_entry((*lp), &the_lnet.ln_peers, lp_on_lnet_peer_list) { + list_for_each_entry((*lpn), &((*lp)->lp_peer_nets), lpn_on_peer_list) { + list_for_each_entry(lpni, &((*lpn)->lpn_peer_nis), + lpni_on_peer_net_list) + if (idx-- == 0) + return lpni; + } + } + + return NULL; +} + +struct lnet_peer_ni * lnet_get_next_peer_ni_locked(struct lnet_peer *peer, struct lnet_peer_net *peer_net, struct lnet_peer_ni *prev) @@ -392,6 +445,224 @@ lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id) return NULL; } +/* + * given the key nid find the peer to add the new peer NID to. If the key + * nid is NULL, then create a new peer, but first make sure that the NID + * is unique + */ +int +lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid) +{ + struct lnet_peer_ni *lpni, *lpni2; + struct lnet_peer *peer; + struct lnet_peer_net *peer_net, *pn; + int cpt, cpt2, rc; + struct lnet_peer_table *ptable = NULL; + __u32 net_id = LNET_NIDNET(nid); + + if (nid == LNET_NID_ANY) + return -EINVAL; + + /* check that nid is unique */ + cpt = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); + lnet_net_lock(cpt); + lpni = lnet_find_peer_ni_locked(nid); + if (lpni != NULL) { + lnet_peer_ni_decref_locked(lpni); + lnet_net_unlock(cpt); + return -EEXIST; + } + lnet_net_unlock(cpt); + + if (key_nid != LNET_NID_ANY) { + cpt2 = lnet_nid_cpt_hash(key_nid, LNET_CPT_NUMBER); + lnet_net_lock(cpt2); + lpni = lnet_find_peer_ni_locked(key_nid); + if (lpni == NULL) { + lnet_net_unlock(cpt2); + /* key_nid refers to a non-existant peer_ni.*/ + return -EINVAL; + } + peer = lpni->lpni_peer_net->lpn_peer; + peer->lp_multi_rail = true; + lnet_peer_ni_decref_locked(lpni); + lnet_net_unlock(cpt2); + } else { + lnet_net_lock(LNET_LOCK_EX); + rc = lnet_nid2peerni_locked(&lpni, nid, LNET_LOCK_EX); + if (rc == 0) { + lpni->lpni_peer_net->lpn_peer->lp_multi_rail = true; + lnet_peer_ni_decref_locked(lpni); + } + lnet_net_unlock(LNET_LOCK_EX); + return rc; + } + + lpni = NULL; + + LIBCFS_CPT_ALLOC(lpni, lnet_cpt_table(), cpt, sizeof(*lpni)); + if (lpni == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&lpni->lpni_txq); + INIT_LIST_HEAD(&lpni->lpni_rtrq); + INIT_LIST_HEAD(&lpni->lpni_routes); + INIT_LIST_HEAD(&lpni->lpni_hashlist); + INIT_LIST_HEAD(&lpni->lpni_on_peer_net_list); + INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list); + + lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */ + lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */ + lpni->lpni_ping_feats = LNET_PING_FEAT_INVAL; + lpni->lpni_nid = nid; + lpni->lpni_cpt = cpt; + lnet_set_peer_ni_health_locked(lpni, true); + + /* allocate here in case we need to add a new peer_net */ + peer_net = NULL; + LIBCFS_ALLOC(peer_net, sizeof(*peer_net)); + if (peer_net == NULL) { + rc = -ENOMEM; + if (lpni != NULL) + LIBCFS_FREE(lpni, sizeof(*lpni)); + return rc; + } + + lnet_net_lock(LNET_LOCK_EX); + + ptable = the_lnet.ln_peer_tables[cpt]; + ptable->pt_number++; + + lpni2 = lnet_find_peer_ni_locked(nid); + if (lpni2 != NULL) { + lnet_peer_ni_decref_locked(lpni2); + /* sanity check that lpni2's peer is what we expect */ + if (lpni2->lpni_peer_net->lpn_peer != peer) + rc = -EEXIST; + else + rc = -EINVAL; + + ptable->pt_number--; + /* another thread has already added it */ + lnet_net_unlock(LNET_LOCK_EX); + LIBCFS_FREE(peer_net, sizeof(*peer_net)); + return rc; + } + + lpni->lpni_net = lnet_get_net_locked(LNET_NIDNET(lpni->lpni_nid)); + if (lpni->lpni_net != NULL) { + lpni->lpni_txcredits = + lpni->lpni_mintxcredits = lpni->lpni_net->net_peertxcredits; + lpni->lpni_rtrcredits = + lpni->lpni_minrtrcredits = lnet_peer_buffer_credits(lpni->lpni_net); + } else { + /* + * if you're adding a peer which is not on a local network + * then we can't assign any of the credits. It won't be + * picked for sending anyway. Eventually a network can be + * added, in this case we need to revisit this peer and + * update its credits. + */ + + /* increment refcount for remote peer list */ + atomic_inc(&lpni->lpni_refcount); + list_add_tail(&lpni->lpni_on_remote_peer_ni_list, + &the_lnet.ln_remote_peer_ni_list); + } + + /* increment refcount for peer on hash list */ + atomic_inc(&lpni->lpni_refcount); + + list_add_tail(&lpni->lpni_hashlist, + &ptable->pt_hash[lnet_nid2peerhash(nid)]); + ptable->pt_version++; + + /* add the lpni to a net */ + list_for_each_entry(pn, &peer->lp_peer_nets, lpn_on_peer_list) { + if (pn->lpn_net_id == net_id) { + list_add_tail(&lpni->lpni_on_peer_net_list, + &pn->lpn_peer_nis); + lpni->lpni_peer_net = pn; + lnet_net_unlock(LNET_LOCK_EX); + LIBCFS_FREE(peer_net, sizeof(*peer_net)); + return 0; + } + } + + INIT_LIST_HEAD(&peer_net->lpn_on_peer_list); + INIT_LIST_HEAD(&peer_net->lpn_peer_nis); + + /* build the hierarchy */ + peer_net->lpn_net_id = net_id; + peer_net->lpn_peer = peer; + lpni->lpni_peer_net = peer_net; + list_add_tail(&lpni->lpni_on_peer_net_list, &peer_net->lpn_peer_nis); + list_add_tail(&peer_net->lpn_on_peer_list, &peer->lp_peer_nets); + + lnet_net_unlock(LNET_LOCK_EX); + return 0; +} + +int +lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid) +{ + int cpt; + lnet_nid_t local_nid; + struct lnet_peer *peer; + struct lnet_peer_ni *lpni, *lpni2; + struct lnet_peer_table *ptable = NULL; + + if (key_nid == LNET_NID_ANY) + return -EINVAL; + + local_nid = (nid != LNET_NID_ANY) ? nid : key_nid; + cpt = lnet_nid_cpt_hash(local_nid, LNET_CPT_NUMBER); + lnet_net_lock(LNET_LOCK_EX); + + lpni = lnet_find_peer_ni_locked(local_nid); + if (lpni == NULL) { + lnet_net_unlock(cpt); + return -EINVAL; + } + lnet_peer_ni_decref_locked(lpni); + + peer = lpni->lpni_peer_net->lpn_peer; + LASSERT(peer != NULL); + + if (peer->lp_primary_nid == lpni->lpni_nid) { + /* + * deleting the primary ni is equivalent to deleting the + * entire peer + */ + lpni = NULL; + lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni); + while (lpni != NULL) { + lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni); + cpt = lnet_nid_cpt_hash(lpni->lpni_nid, + LNET_CPT_NUMBER); + lnet_peer_remove_from_remote_list(lpni); + ptable = the_lnet.ln_peer_tables[cpt]; + ptable->pt_zombies++; + list_del_init(&lpni->lpni_hashlist); + lnet_peer_ni_decref_locked(lpni); + lpni = lpni2; + } + lnet_net_unlock(LNET_LOCK_EX); + + return 0; + } + + lnet_peer_remove_from_remote_list(lpni); + cpt = lnet_nid_cpt_hash(lpni->lpni_nid, LNET_CPT_NUMBER); + ptable = the_lnet.ln_peer_tables[cpt]; + ptable->pt_zombies++; + list_del_init(&lpni->lpni_hashlist); + lnet_peer_ni_decref_locked(lpni); + lnet_net_unlock(LNET_LOCK_EX); + + return 0; +} + void lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni) { @@ -477,6 +748,9 @@ lnet_nid2peerni_locked(struct lnet_peer_ni **lpnip, lnet_nid_t nid, int cpt) INIT_LIST_HEAD(&lpni->lpni_txq); INIT_LIST_HEAD(&lpni->lpni_rtrq); INIT_LIST_HEAD(&lpni->lpni_routes); + INIT_LIST_HEAD(&lpni->lpni_hashlist); + INIT_LIST_HEAD(&lpni->lpni_on_peer_net_list); + INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list); lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */ lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */ @@ -511,8 +785,20 @@ lnet_nid2peerni_locked(struct lnet_peer_ni **lpnip, lnet_nid_t nid, int cpt) lpni->lpni_minrtrcredits = lnet_peer_buffer_credits(lpni->lpni_net); } else { + /* + * if you're adding a peer which is not on a local network + * then we can't assign any of the credits. It won't be + * picked for sending anyway. Eventually a network can be + * added, in this case we need to revisit this peer and + * update its credits. + */ + CDEBUG(D_NET, "peer_ni %s is not directly connected\n", libcfs_nid2str(nid)); + /* increment refcount for remote peer list */ + atomic_inc(&lpni->lpni_refcount); + list_add_tail(&lpni->lpni_on_remote_peer_ni_list, + &the_lnet.ln_remote_peer_ni_list); } lnet_set_peer_ni_health_locked(lpni, true); @@ -573,12 +859,12 @@ lnet_debug_peer(lnet_nid_t nid) lnet_net_unlock(cpt); } -int lnet_get_peer_info(__u32 peer_index, __u64 *nid, - char aliveness[LNET_MAX_STR_LEN], - __u32 *cpt_iter, __u32 *refcount, - __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, - __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits, - __u32 *peer_tx_qnob) +int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, + char aliveness[LNET_MAX_STR_LEN], + __u32 *cpt_iter, __u32 *refcount, + __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, + __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits, + __u32 *peer_tx_qnob) { struct lnet_peer_table *peer_table; struct lnet_peer_ni *lp; @@ -635,3 +921,34 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid, return found ? 0 : -ENOENT; } + +int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid, + struct lnet_peer_ni_credit_info *peer_ni_info) +{ + struct lnet_peer_ni *lpni = NULL; + struct lnet_peer_net *lpn = NULL; + struct lnet_peer *lp = NULL; + + lpni = lnet_get_peer_ni_idx_locked(idx, &lpn, &lp); + + if (!lpni) + return -ENOENT; + + *primary_nid = lp->lp_primary_nid; + *nid = lpni->lpni_nid; + snprintf(peer_ni_info->cr_aliveness, LNET_MAX_STR_LEN, "NA"); + if (lnet_isrouter(lpni) || + lnet_peer_aliveness_enabled(lpni)) + snprintf(peer_ni_info->cr_aliveness, LNET_MAX_STR_LEN, + lpni->lpni_alive ? "up" : "down"); + + peer_ni_info->cr_refcount = atomic_read(&lpni->lpni_refcount); + peer_ni_info->cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ? + lpni->lpni_net->net_peertxcredits : 0; + peer_ni_info->cr_peer_tx_credits = lpni->lpni_txcredits; + peer_ni_info->cr_peer_rtr_credits = lpni->lpni_rtrcredits; + peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_mintxcredits; + peer_ni_info->cr_peer_tx_qnob = lpni->lpni_txqnob; + + return 0; +} diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 252a548..ad97f6c 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -1763,7 +1763,7 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) return -ESHUTDOWN; } - lp = lnet_find_peer_ni_locked(nid, cpt); + lp = lnet_find_peer_ni_locked(nid); if (lp == NULL) { /* nid not found */ lnet_net_unlock(cpt); diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 3e50f81..edfd5d5 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -92,6 +92,185 @@ int lustre_lnet_config_ni_system(bool up, bool load_ni_from_mod, return rc; } +static lnet_nid_t *allocate_create_nid_array(char **nids, char *err_str) +{ + lnet_nid_t *array = NULL; + int idx = 0; + + if (!nids) { + snprintf(err_str, LNET_MAX_STR_LEN, "no NIDs to add"); + return NULL; + } + + /* count the size of the array */ + while (nids[idx] != NULL) + idx++; + + array = calloc(sizeof(*array) * idx + 1, 1); + if (array == NULL) { + snprintf(err_str, LNET_MAX_STR_LEN, "out of memory"); + return NULL; + } + + idx = 0; + while (nids[idx] != NULL) { + array[idx] = libcfs_str2nid(nids[idx]); + if (array[idx] == LNET_NID_ANY) { + free(array); + snprintf(err_str, LNET_MAX_STR_LEN, + "bad NID: '%s'", + nids[idx]); + return NULL; + } + idx++; + } + + /* identify last entry */ + array[idx] = LNET_NID_ANY; + + return array; +} + +static int dispatch_peer_ni_cmd(lnet_nid_t knid, lnet_nid_t nid, __u32 cmd, + struct lnet_ioctl_peer_cfg *data, + char *err_str, char *cmd_str) +{ + int rc; + + data->prcfg_key_nid = knid; + data->prcfg_cfg_nid = nid; + + rc = l_ioctl(LNET_DEV_ID, cmd, data); + if (rc != 0) { + rc = -errno; + snprintf(err_str, + LNET_MAX_STR_LEN, + "\"cannot %s peer ni: %s\"", + (cmd_str) ? cmd_str : "add", strerror(errno)); + } + + return rc; +} + +int lustre_lnet_config_peer_nid(char *knid, char **nid, int seq_no, + struct cYAML **err_rc) +{ + struct lnet_ioctl_peer_cfg data; + lnet_nid_t key_nid = LNET_NID_ANY; + int rc = LUSTRE_CFG_RC_NO_ERR; + int idx = 0; + char err_str[LNET_MAX_STR_LEN] = {0}; + lnet_nid_t *nids = allocate_create_nid_array(nid, err_str); + + if (knid != NULL) { + key_nid = libcfs_str2nid(knid); + if (key_nid == LNET_NID_ANY) { + snprintf(err_str, sizeof(err_str), + "bad key NID: '%s'", + knid); + rc = LUSTRE_CFG_RC_MISSING_PARAM; + goto out; + } + } else if (nids[0] == LNET_NID_ANY) { + snprintf(err_str, sizeof(err_str), + "no NIDs provided for configuration"); + rc = LUSTRE_CFG_RC_MISSING_PARAM; + goto out; + } else { + key_nid = LNET_NID_ANY; + } + + snprintf(err_str, sizeof(err_str), "\"Success\""); + + LIBCFS_IOC_INIT_V2(data, prcfg_hdr); + if (nids[0] == LNET_NID_ANY) { + rc = dispatch_peer_ni_cmd(LNET_NID_ANY, key_nid, + IOC_LIBCFS_ADD_PEER_NI, + &data, err_str, "add"); + goto out; + } + + while (nids[idx] != LNET_NID_ANY) { + /* + * If key_nid is not provided then the first nid in the + * list becomes the key_nid. First time round the loop use + * LNET_NID_ANY for the first parameter, then use nid[0] + * as the key nid after wards + */ + rc = dispatch_peer_ni_cmd(key_nid, nids[idx], + IOC_LIBCFS_ADD_PEER_NI, &data, + err_str, "add"); + + if (rc != 0) + goto out; + + if (idx == 0 && key_nid == LNET_NID_ANY) + key_nid = nids[0]; + + idx++; + } + +out: + if (nids != NULL) + free(nids); + cYAML_build_error(rc, seq_no, ADD_CMD, "peer_ni", err_str, err_rc); + return rc; +} + +int lustre_lnet_del_peer_nid(char *knid, char **nid, int seq_no, + struct cYAML **err_rc) +{ + struct lnet_ioctl_peer_cfg data; + lnet_nid_t key_nid; + int rc = LUSTRE_CFG_RC_NO_ERR; + int idx = 0; + char err_str[LNET_MAX_STR_LEN] = {0}; + lnet_nid_t *nids = allocate_create_nid_array(nid, err_str); + + if (knid == NULL) { + snprintf(err_str, sizeof(err_str), + "\"Primary nid is not provided\""); + rc = LUSTRE_CFG_RC_MISSING_PARAM; + goto out; + } else { + key_nid = libcfs_str2nid(knid); + if (key_nid == LNET_NID_ANY) { + rc = LUSTRE_CFG_RC_BAD_PARAM; + snprintf(err_str, sizeof(err_str), + "bad key NID: '%s'", + knid); + goto out; + } + } + + snprintf(err_str, sizeof(err_str), "\"Success\""); + + LIBCFS_IOC_INIT_V2(data, prcfg_hdr); + if (nids[0] == LNET_NID_ANY) { + rc = dispatch_peer_ni_cmd(key_nid, LNET_NID_ANY, + IOC_LIBCFS_DEL_PEER_NI, + &data, err_str, "del"); + goto out; + } + + while (nids[idx] != LNET_NID_ANY) { + rc = dispatch_peer_ni_cmd(key_nid, nids[idx], + IOC_LIBCFS_DEL_PEER_NI, &data, + err_str, "del"); + + if (rc != 0) + goto out; + + idx++; + } + +out: + if (nids != NULL) + free(nids); + cYAML_build_error(rc, seq_no, DEL_CMD, "peer_ni", err_str, err_rc); + return rc; +} + int lustre_lnet_config_route(char *nw, char *gw, int hops, int prio, int seq_no, struct cYAML **err_rc) { @@ -1002,20 +1181,28 @@ out: return rc; } -int lustre_lnet_show_peer_credits(int seq_no, struct cYAML **show_rc, - struct cYAML **err_rc) +int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc) { - struct lnet_ioctl_peer peer_info; + struct lnet_ioctl_peer_cfg *peer_info; + struct lnet_peer_ni_credit_info *lpni_cri; int rc = LUSTRE_CFG_RC_OUT_OF_MEM, ncpt = 0, i = 0, j = 0; int l_errno = 0; - struct cYAML *root = NULL, *peer = NULL, *first_seq = NULL, - *peer_root = NULL; + struct cYAML *root = NULL, *peer = NULL, *peer_ni = NULL, + *first_seq = NULL, *peer_root = NULL, *tmp = NULL; char err_str[LNET_MAX_STR_LEN]; - bool ncpt_set = false; + lnet_nid_t prev_primary_nid = LNET_NID_ANY, primary_nid = LNET_NID_ANY; + char *data = calloc(sizeof(*peer_info) + sizeof(*lpni_cri), 1); + bool new_peer = true; snprintf(err_str, sizeof(err_str), "\"out of memory\""); + if (data == NULL) + goto out; + + peer_info = (struct lnet_ioctl_peer_cfg *)data; + /* create struct cYAML root object */ root = cYAML_create_object(NULL, NULL); if (root == NULL) @@ -1025,81 +1212,95 @@ int lustre_lnet_show_peer_credits(int seq_no, struct cYAML **show_rc, if (peer_root == NULL) goto out; + if (knid != NULL) + primary_nid = libcfs_str2nid(knid); + do { for (i = 0;; i++) { - LIBCFS_IOC_INIT_V2(peer_info, pr_hdr); - peer_info.pr_count = i; - peer_info.pr_lnd_u.pr_peer_credits.cr_ncpt = j; + memset(data, 0, sizeof(*peer_info) + sizeof(*lpni_cri)); + LIBCFS_IOC_INIT_V2(*peer_info, prcfg_hdr); + peer_info->prcfg_hdr.ioc_len = sizeof(*peer_info) + + sizeof(*lpni_cri); + peer_info->prcfg_idx = i; + rc = l_ioctl(LNET_DEV_ID, - IOC_LIBCFS_GET_PEER_INFO, &peer_info); + IOC_LIBCFS_GET_PEER_NI, peer_info); if (rc != 0) { l_errno = errno; break; } - if (ncpt_set != 0) { - ncpt = peer_info.pr_lnd_u.pr_peer_credits. - cr_ncpt; - ncpt_set = true; - } + if (primary_nid != LNET_NID_ANY && + primary_nid != peer_info->prcfg_key_nid) + continue; + + lpni_cri = (struct lnet_peer_ni_credit_info*)peer_info->prcfg_bulk; peer = cYAML_create_seq_item(peer_root); if (peer == NULL) goto out; + if (peer_info->prcfg_key_nid != prev_primary_nid) { + prev_primary_nid = peer_info->prcfg_key_nid; + new_peer = true; + } + + if (new_peer) { + lnet_nid_t pnid = peer_info->prcfg_key_nid; + if (cYAML_create_string(peer, "primary nid", + libcfs_nid2str(pnid)) + == NULL) + goto out; + tmp = cYAML_create_seq(peer, "peer ni"); + if (tmp == NULL) + goto out; + new_peer = false; + } + if (first_seq == NULL) first_seq = peer; - if (cYAML_create_string(peer, "nid", + peer_ni = cYAML_create_seq_item(tmp); + if (peer_ni == NULL) + goto out; + + if (cYAML_create_string(peer_ni, "nid", libcfs_nid2str - (peer_info.pr_nid)) == NULL) + (peer_info->prcfg_cfg_nid)) + == NULL) goto out; - if (cYAML_create_string(peer, "state", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_aliveness) == - NULL) + if (cYAML_create_string(peer_ni, "state", + lpni_cri->cr_aliveness) + == NULL) goto out; - if (cYAML_create_number(peer, "refcount", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_refcount) == NULL) + if (cYAML_create_number(peer_ni, "refcount", + lpni_cri->cr_refcount) == NULL) goto out; - if (cYAML_create_number(peer, "max_ni_tx_credits", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_ni_peer_tx_credits) + if (cYAML_create_number(peer_ni, "max_ni_tx_credits", + lpni_cri->cr_ni_peer_tx_credits) == NULL) goto out; - if (cYAML_create_number(peer, "available_tx_credits", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_peer_tx_credits) + if (cYAML_create_number(peer_ni, "available_tx_credits", + lpni_cri->cr_peer_tx_credits) == NULL) goto out; - if (cYAML_create_number(peer, "available_rtr_credits", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_peer_rtr_credits) + if (cYAML_create_number(peer_ni, "available_rtr_credits", + lpni_cri->cr_peer_rtr_credits) == NULL) goto out; - if (cYAML_create_number(peer, "min_rtr_credits", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_peer_min_rtr_credits) + if (cYAML_create_number(peer_ni, "min_rtr_credits", + lpni_cri->cr_peer_min_rtr_credits) == NULL) goto out; - if (cYAML_create_number(peer, "tx_q_num_of_buf", - peer_info.pr_lnd_u. - pr_peer_credits. - cr_peer_tx_qnob) + if (cYAML_create_number(peer_ni, "tx_q_num_of_buf", + lpni_cri->cr_peer_tx_qnob) == NULL) goto out; } @@ -1132,7 +1333,7 @@ out: * insert one. Otherwise add to the one there */ show_node = cYAML_get_object_item(*show_rc, - "peer_credits"); + "peer"); if (show_node != NULL && cYAML_is_sequence(show_node)) { cYAML_insert_child(show_node, first_seq); free(peer_root); @@ -1148,7 +1349,7 @@ out: *show_rc = root; } - cYAML_build_error(rc, seq_no, SHOW_CMD, "peer_credits", err_str, + cYAML_build_error(rc, seq_no, SHOW_CMD, "peer", err_str, err_rc); return rc; @@ -1348,6 +1549,114 @@ ignore_child: err_rc); } +static void yaml_free_string_array(char **str_array, int num) +{ + int i; + + for (i = 0; i < num; i++) + free(str_array[num]); + free(str_array); +} + +static int yaml_copy_peer_nids(struct cYAML *tree, char ***nidsppp) +{ + struct cYAML *nids_entry = NULL, *child; + char **nids = NULL; + int num = 0, rc = LUSTRE_CFG_RC_NO_ERR; + + nids_entry = cYAML_get_object_item(tree, "nids"); + if (nids_entry != NULL) { + /* count */ + child = nids_entry->cy_child; + while (child != NULL) { + num++; + child = child->cy_next; + } + + if (num == 0) + return LUSTRE_CFG_RC_MISSING_PARAM; + + nids = calloc(sizeof(*nids) * num, 1); + if (nids == NULL) + return LUSTRE_CFG_RC_OUT_OF_MEM; + + /* now grab all the nids */ + child = nids_entry->cy_child; + num = 0; + while (child != NULL) { + nids[num] = calloc(strlen(child->cy_valuestring) + 1, + 1); + if (nids[num] == NULL) { + rc = LUSTRE_CFG_RC_OUT_OF_MEM; + goto failed; + } + strncpy(nids[num], child->cy_valuestring, + strlen(child->cy_valuestring)); + child = child->cy_next; + num++; + } + rc = num; + } else { + rc = LUSTRE_CFG_RC_MISSING_PARAM; + goto failed; + } + + *nidsppp = nids; + return rc; + +failed: + if (nids != NULL) + yaml_free_string_array(nids, num); + *nidsppp = NULL; + return rc; +} + +static int handle_yaml_config_peer(struct cYAML *tree, struct cYAML **show_rc, + struct cYAML **err_rc) +{ + char **nids = NULL; + int num, rc; + struct cYAML *seq_no, *key_nid; + + num = yaml_copy_peer_nids(tree, &nids); + if (num < 0) + return num; + + seq_no = cYAML_get_object_item(tree, "seq_no"); + key_nid = cYAML_get_object_item(tree, "key_nid"); + + rc = lustre_lnet_config_peer_nid((key_nid) ? key_nid->cy_valuestring : NULL, + nids, + (seq_no) ? seq_no->cy_valueint : -1, + err_rc); + + yaml_free_string_array(nids, num); + return rc; +} + +static int handle_yaml_del_peer(struct cYAML *tree, struct cYAML **show_rc, + struct cYAML **err_rc) +{ + char **nids = NULL; + int num, rc; + struct cYAML *seq_no, *key_nid; + + num = yaml_copy_peer_nids(tree, &nids); + if (num < 0) + return num; + + seq_no = cYAML_get_object_item(tree, "seq_no"); + key_nid = cYAML_get_object_item(tree, "key_nid"); + + rc = lustre_lnet_del_peer_nid((key_nid) ? key_nid->cy_valuestring : NULL, + nids, + (seq_no) ? seq_no->cy_valueint : -1, + err_rc); + + yaml_free_string_array(nids, num); + return rc; +} + static int handle_yaml_config_buffers(struct cYAML *tree, struct cYAML **show_rc, struct cYAML **err_rc) @@ -1488,13 +1797,14 @@ static int handle_yaml_show_routing(struct cYAML *tree, struct cYAML **show_rc, static int handle_yaml_show_credits(struct cYAML *tree, struct cYAML **show_rc, struct cYAML **err_rc) { - struct cYAML *seq_no; + struct cYAML *seq_no, *key_nid; seq_no = cYAML_get_object_item(tree, "seq_no"); + key_nid = cYAML_get_object_item(tree, "key_nid"); - return lustre_lnet_show_peer_credits((seq_no) ? - seq_no->cy_valueint : -1, - show_rc, err_rc); + return lustre_lnet_show_peer((key_nid) ? key_nid->cy_valuestring : NULL, + (seq_no) ? seq_no->cy_valueint : -1, + show_rc, err_rc); } static int handle_yaml_show_stats(struct cYAML *tree, struct cYAML **show_rc, @@ -1516,6 +1826,7 @@ struct lookup_cmd_hdlr_tbl { static struct lookup_cmd_hdlr_tbl lookup_config_tbl[] = { {"route", handle_yaml_config_route}, {"net", handle_yaml_config_net}, + {"peer", handle_yaml_config_peer}, {"routing", handle_yaml_config_routing}, {"buffers", handle_yaml_config_buffers}, {NULL, NULL} @@ -1524,6 +1835,7 @@ static struct lookup_cmd_hdlr_tbl lookup_config_tbl[] = { static struct lookup_cmd_hdlr_tbl lookup_del_tbl[] = { {"route", handle_yaml_del_route}, {"net", handle_yaml_del_net}, + {"peer", handle_yaml_del_peer}, {"routing", handle_yaml_del_routing}, {NULL, NULL} }; diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index a051220..38dce8c 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -196,27 +196,66 @@ int lustre_lnet_show_routing(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); /* - * lustre_lnet_show_peer_credits - * Shows credit details on the peers in the system + * lustre_lnet_show_stats + * Shows internal LNET statistics. This is useful to display the + * current LNET activity, such as number of messages route, etc * * seq_no - sequence number of the command * show_rc - YAML structure of the resultant show * err_rc - YAML strucutre of the resultant return code. */ -int lustre_lnet_show_peer_credits(int seq_no, struct cYAML **show_rc, - struct cYAML **err_rc); +int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc); /* - * lustre_lnet_show_stats - * Shows internal LNET statistics. This is useful to display the - * current LNET activity, such as number of messages route, etc + * lustre_lnet_config_peer_nid + * Add a peer nid to an peer identified by knid. If no knid is given + * then the first nid in the nid list becomes the primary nid for + * a newly created peer. + * Otherwise if knid is provided an it's unique then a new peer is + * created with knid as the primary NID and the nids in the nid list as + * secondary nids. + * If any of the peers nids provided in with exception to the knid is + * not unique the operation fails. Some peer nids might have already + * been added. It's the role of the caller of this API to remove the + * added NIDs if they wish. * + * knid - Key NID of the peer + * nid - list of nids to add + * seq_no - sequence number of the command + * err_rc - YAML strucutre of the resultant return code. + */ +int lustre_lnet_config_peer_nid(char *knid, char **nid, int seq_no, + struct cYAML **err_rc); + +/* + * lustre_lnet_del_peer_nid + * Delete the nids identified in the nid list from the peer identified by + * knid. If knid is NULL or it doesn't identify a peer the operation + * fails and no change happens to the system. + * The operation is aborted on the first NID that fails to be deleted. + * + * knid - Key NID of the peer + * nid - list of nids to add + * seq_no - sequence number of the command + * err_rc - YAML strucutre of the resultant return code. + */ +int lustre_lnet_del_peer_nid(char *knid, char **nid, int seq_no, + struct cYAML **err_rc); + +/* + * lustre_lnet_show_peer + * Show the peer identified by knid. If knid is NULL all peers in the + * system are shown. + * + * knid - Key NID of the peer * seq_no - sequence number of the command * show_rc - YAML structure of the resultant show * err_rc - YAML strucutre of the resultant return code. + * */ -int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc, - struct cYAML **err_rc); +int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc); /* * lustre_yaml_config diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index b407daf..e3f0621 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -47,10 +47,13 @@ static int jt_show_route(int argc, char **argv); static int jt_show_net(int argc, char **argv); static int jt_show_routing(int argc, char **argv); static int jt_show_stats(int argc, char **argv); -static int jt_show_peer_credits(int argc, char **argv); +static int jt_show_peer(int argc, char **argv); static int jt_set_tiny(int argc, char **argv); static int jt_set_small(int argc, char **argv); static int jt_set_large(int argc, char **argv); +static int jt_add_peer_nid(int argc, char **argv); +static int jt_del_peer_nid(int argc, char **argv); +/*static int jt_show_peer(int argc, char **argv);*/ command_t lnet_cmds[] = { {"configure", jt_config_lnet, 0, "configure lnet\n" @@ -105,11 +108,6 @@ command_t stats_cmds[] = { { 0, 0, 0, NULL } }; -command_t credits_cmds[] = { - {"show", jt_show_peer_credits, 0, "show peer credits\n"}, - { 0, 0, 0, NULL } -}; - command_t set_cmds[] = { {"tiny_buffers", jt_set_tiny, 0, "set tiny routing buffers\n" "\tVALUE must be greater than 0\n"}, @@ -123,6 +121,21 @@ command_t set_cmds[] = { { 0, 0, 0, NULL } }; +command_t peer_cmds[] = { + {"add", jt_add_peer_nid, 0, "add a peer NID\n" + "\t--key_nid: NID to identify peer. If not provided then the first\n" + "\t NID in the list becomes the key NID of a newly created\n" + "\t peer. \n" + "\t--nid: one or more peer NIDs\n"}, + {"del", jt_del_peer_nid, 0, "delete a peer NID\n" + "\t--key_nid: NID to identify peer.\n" + "\t--nid: list of NIDs to remove. If none provided,\n" + "\t peer is deleted\n"}, + {"show", jt_show_peer, 0, "show peer credits\n" + "\t--primary_nid: NID of peer to filter on.\n"}, + { 0, 0, 0, NULL } +}; + static inline void print_help(const command_t cmds[], const char *cmd_type, const char *pc_name) { @@ -729,27 +742,6 @@ static int jt_show_stats(int argc, char **argv) return rc; } -static int jt_show_peer_credits(int argc, char **argv) -{ - int rc; - struct cYAML *show_rc = NULL, *err_rc = NULL; - - if (handle_help(credits_cmds, "peer_credits", "show", argc, argv) == 0) - return 0; - - rc = lustre_lnet_show_peer_credits(-1, &show_rc, &err_rc); - - if (rc != LUSTRE_CFG_RC_NO_ERR) - cYAML_print_tree2file(stderr, err_rc); - else if (show_rc) - cYAML_print_tree(show_rc); - - cYAML_free_tree(err_rc); - cYAML_free_tree(show_rc); - - return rc; -} - static inline int jt_lnet(int argc, char **argv) { if (argc < 2) @@ -810,16 +802,16 @@ static inline int jt_stats(int argc, char **argv) return Parser_execarg(argc - 1, &argv[1], stats_cmds); } -static inline int jt_peer_credits(int argc, char **argv) +static inline int jt_peers(int argc, char **argv) { if (argc < 2) return CMD_HELP; if (argc == 2 && - handle_help(credits_cmds, "peer_credits", NULL, argc, argv) == 0) + handle_help(peer_cmds, "peer", NULL, argc, argv) == 0) return 0; - return Parser_execarg(argc - 1, &argv[1], credits_cmds); + return Parser_execarg(argc - 1, &argv[1], peer_cmds); } static inline int jt_set(int argc, char **argv) @@ -966,6 +958,172 @@ static int jt_export(int argc, char **argv) return 0; } +static int jt_add_peer_nid(int argc, char **argv) +{ + char *key_nid = NULL; + char *nid[LNET_MAX_INTERFACES] = {NULL}; + int idx = 0; + struct cYAML *err_rc = NULL; + int rc, opt; + + const char *const short_options = "k:n:h"; + const struct option long_options[] = { + { "key_nid", 1, NULL, 'k' }, + { "nid", 1, NULL, 'n' }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 'k': + key_nid = optarg; + break; + case 'n': + if (idx >= LNET_MAX_INTERFACES) { + cYAML_build_error(-1, -1, "peer_ni", "add", + "too many interfaces", + &err_rc); + rc = LUSTRE_CFG_RC_BAD_PARAM; + goto failed; + } + nid[idx] = calloc(strlen(optarg) + 1, 1); + if (nid[idx] == NULL) { + cYAML_build_error(-1, -1, "peer_ni", "add", + "out of memory", + &err_rc); + rc = LUSTRE_CFG_RC_BAD_PARAM; + goto failed; + } + strncpy(nid[idx], optarg, strlen(optarg)); + idx++; + break; + case 'h': + print_help(peer_cmds, "peer", "add"); + return 0; + default: + return 0; + } + } + + rc = lustre_lnet_config_peer_nid(key_nid, nid, -1, &err_rc); + +failed: + idx = 0; + while (nid[idx] != NULL) { + free(nid[idx]); + idx++; + } + + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + +static int jt_del_peer_nid(int argc, char **argv) +{ + char *key_nid = NULL; + char *nid[LNET_MAX_INTERFACES] = {NULL}; + int idx = 0; + struct cYAML *err_rc = NULL; + int rc, opt; + + const char *const short_options = "k:n:h"; + const struct option long_options[] = { + { "key_nid", 1, NULL, 'k' }, + { "nid", 1, NULL, 'n' }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 'k': + key_nid = optarg; + break; + case 'n': + if (idx >= LNET_MAX_INTERFACES) { + cYAML_build_error(-1, -1, "peer_ni", "del", + "too many interfaces", + &err_rc); + rc = LUSTRE_CFG_RC_BAD_PARAM; + goto failed; + } + nid[idx] = calloc(strlen(optarg) + 1, 1); + if (nid[idx] == NULL) { + cYAML_build_error(-1, -1, "peer_ni", "del", + "out of memory", + &err_rc); + rc = LUSTRE_CFG_RC_BAD_PARAM; + goto failed; + } + strncpy(nid[idx], optarg, strlen(optarg)); + idx++; + break; + case 'h': + print_help(peer_cmds, "peer", "del"); + return 0; + default: + return 0; + } + } + + rc = lustre_lnet_del_peer_nid(key_nid, nid, -1, &err_rc); + +failed: + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + +static int jt_show_peer(int argc, char **argv) +{ + char *key_nid = NULL; + int rc, opt; + struct cYAML *err_rc = NULL, *show_rc = NULL; + + const char *const short_options = "k:vh"; + const struct option long_options[] = { + { "key_nid", 1, NULL, 'k' }, + { "help", 0, NULL, 'h' }, + { NULL, 0, NULL, 0 }, + }; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 'k': + key_nid = optarg; + break; + case 'h': + print_help(peer_cmds, "peer", "add"); + return 0; + default: + return 0; + } + } + + rc = lustre_lnet_show_peer(key_nid, -1, &show_rc, &err_rc); + + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + else if (show_rc) + cYAML_print_tree(show_rc); + + cYAML_free_tree(err_rc); + cYAML_free_tree(show_rc); + + return rc; +} + command_t list[] = { {"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"}, {"route", jt_route, 0, "route {add | del | show | help}"}, @@ -977,7 +1135,7 @@ command_t list[] = { "--help} FILE.yaml"}, {"export", jt_export, 0, "export {--help} FILE.yaml"}, {"stats", jt_stats, 0, "stats {show | help}"}, - {"peer_credits", jt_peer_credits, 0, "peer_credits {show | help}"}, + {"peer", jt_peers, 0, "peer {add | del | show | help}"}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, -- 1.8.3.1