From d062d03aa913f7fcabd603ad6de27d3ed086f6df Mon Sep 17 00:00:00 2001 From: Olaf Weber Date: Fri, 27 Jan 2017 16:16:16 +0100 Subject: [PATCH] LU-9480 lnet: add struct lnet_ping_buffer The Multi-Rail code will use the ping target buffer also as the source of data to push to other nodes. This means that there will be multiple MDs referencing the same buffer, and care must be taken to ensure that the buffer is not freed while any such reference remains. Encapsulate the struct lnet_ping_info (aka lnet_ping_info_t) in a struct lnet_ping_buffer. This adds a reference count, and the number of NIDs for the encapsulated lnet_ping_info has been sized. For sizing the buffer the constant LNET_PINGINFO_SIZE is replaced with LNET_PING_INFO_SIZE(NNIS). Test-Parameters: trivial Signed-off-by: Olaf Weber Change-Id: Iae255a7ebd6099c050bddbea84fb1923a586ac66 Reviewed-on: https://review.whamcloud.com/25773 Reviewed-by: Olaf Weber Reviewed-by: Amir Shehata Tested-by: Amir Shehata --- lnet/include/lnet/lib-lnet.h | 22 ++- lnet/include/lnet/lib-types.h | 42 ++++- lnet/lnet/api-ni.c | 345 ++++++++++++++++++++++++------------------ lnet/lnet/router.c | 95 ++++++------ 4 files changed, 301 insertions(+), 203 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 4d7cc9e..0331222 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -789,7 +789,27 @@ int lnet_peer_buffer_credits(struct lnet_net *net); int lnet_router_checker_start(void); void lnet_router_checker_stop(void); void lnet_router_ni_update_locked(struct lnet_peer_ni *gw, __u32 net); -void lnet_swap_pinginfo(struct lnet_ping_info *info); +void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf); + +int lnet_ping_info_validate(struct lnet_ping_info *pinfo); +struct lnet_ping_buffer *lnet_ping_buffer_alloc(int nnis, gfp_t gfp); +void lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf); + +static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf) +{ + atomic_inc(&pbuf->pb_refcnt); +} + +static inline void lnet_ping_buffer_decref(struct lnet_ping_buffer *pbuf) +{ + if (atomic_dec_and_test(&pbuf->pb_refcnt)) + lnet_ping_buffer_free(pbuf); +} + +static inline int lnet_ping_buffer_numref(struct lnet_ping_buffer *pbuf) +{ + return atomic_read(&pbuf->pb_refcnt); +} int lnet_parse_ip2nets(char **networksp, char *ip2nets); int lnet_parse_routes(char *route_str, int *im_a_router); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 1797641..9c072b9 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -417,9 +417,6 @@ typedef struct lnet_ni { #define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ #define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */ -#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ - LNET_PING_FEAT_NI_STATUS) - typedef struct lnet_ping_info { __u32 pi_magic; __u32 pi_features; @@ -428,15 +425,38 @@ typedef struct lnet_ping_info { struct lnet_ni_status pi_ni[0]; } WIRE_ATTR lnet_ping_info_t; +#define LNET_PING_INFO_SIZE(NNIDS) \ + offsetof(struct lnet_ping_info, pi_ni[NNIDS]) +#define LNET_PING_INFO_LONI(PINFO) ((PINFO)->pi_ni[0].ns_nid) +#define LNET_PING_INFO_SEQNO(PINFO) ((PINFO)->pi_ni[0].ns_status) + +/* + * Descriptor of a ping info buffer: keep a separate indicator of the + * size and a reference count. The type is used both as a source and + * sink of data, so we need to keep some information outside of the + * area that may be overwritten by network data. + */ +struct lnet_ping_buffer { + int pb_nnis; + atomic_t pb_refcnt; + struct lnet_ping_info pb_info; +}; + +#define LNET_PING_BUFFER_SIZE(NNIDS) \ + offsetof(struct lnet_ping_buffer, pb_info.pi_ni[NNIDS]) +#define LNET_PING_BUFFER_LONI(PBUF) ((PBUF)->pb_info.pi_ni[0].ns_nid) +#define LNET_PING_BUFFER_SEQNO(PBUF) ((PBUF)->pb_info.pi_ni[0].ns_status) + + /* router checker data, per router */ -#define LNET_MAX_RTR_NIS 16 -#define LNET_PINGINFO_SIZE offsetof(struct lnet_ping_info, pi_ni[LNET_MAX_RTR_NIS]) +#define LNET_MAX_RTR_NIS LNET_INTERFACES_MIN +#define LNET_RTR_PINGINFO_SIZE LNET_PING_INFO_SIZE(LNET_MAX_RTR_NIS) typedef struct lnet_rc_data { /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ struct list_head rcd_list; struct lnet_handle_md rcd_mdh; /* ping buffer MD */ struct lnet_peer_ni *rcd_gateway; /* reference to gateway */ - struct lnet_ping_info *rcd_pinginfo; /* ping buffer */ + struct lnet_ping_buffer *rcd_pingbuffer;/* ping buffer */ } lnet_rc_data_t; struct lnet_peer_ni { @@ -797,9 +817,17 @@ typedef struct lnet { /* percpt router buffer pools */ struct lnet_rtrbufpool **ln_rtrpools; + /* + * Ping target / Push source + * + * The ping target and push source share a single buffer. The + * ln_ping_target is protected against concurrent updates by + * ln_api_mutex. + */ struct lnet_handle_md ln_ping_target_md; struct lnet_handle_eq ln_ping_target_eq; - struct lnet_ping_info *ln_ping_info; + struct lnet_ping_buffer *ln_ping_target; + atomic_t ln_ping_target_seqno; /* router checker startup/shutdown state */ int ln_rc_state; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 8e20d36..0bdcbad 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -961,25 +961,44 @@ lnet_count_acceptor_nets(void) return count; } -static struct lnet_ping_info * -lnet_ping_info_create(int num_ni) +struct lnet_ping_buffer * +lnet_ping_buffer_alloc(int nnis, gfp_t gfp) { - struct lnet_ping_info *ping_info; - unsigned int infosz; + struct lnet_ping_buffer *pbuf; - infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]); - LIBCFS_ALLOC(ping_info, infosz); - if (ping_info == NULL) { - CERROR("Can't allocate ping info[%d]\n", num_ni); + LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nnis), gfp); + if (pbuf) { + pbuf->pb_nnis = nnis; + atomic_set(&pbuf->pb_refcnt, 1); + } + + return pbuf; +} + +void +lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf) +{ + LASSERT(lnet_ping_buffer_numref(pbuf) == 0); + LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nnis)); +} + +static struct lnet_ping_buffer * +lnet_ping_target_create(int nnis) +{ + struct lnet_ping_buffer *pbuf; + + pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS); + if (pbuf == NULL) { + CERROR("Can't allocate ping source [%d]\n", nnis); return NULL; } - ping_info->pi_nnis = num_ni; - ping_info->pi_pid = the_lnet.ln_pid; - ping_info->pi_magic = LNET_PROTO_PING_MAGIC; - ping_info->pi_features = LNET_PING_FEAT_NI_STATUS; + pbuf->pb_info.pi_nnis = nnis; + pbuf->pb_info.pi_pid = the_lnet.ln_pid; + pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC; + pbuf->pb_info.pi_features = LNET_PING_FEAT_NI_STATUS; - return ping_info; + return pbuf; } static inline int @@ -1025,16 +1044,25 @@ lnet_get_ni_count(void) return count; } -static inline void -lnet_ping_info_free(struct lnet_ping_info *pinfo) +int +lnet_ping_info_validate(struct lnet_ping_info *pinfo) { - LIBCFS_FREE(pinfo, - offsetof(struct lnet_ping_info, - pi_ni[pinfo->pi_nnis])); + if (!pinfo) + return -EINVAL; + if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC) + return -EPROTO; + if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS)) + return -EPROTO; + /* Loopback is guaranteed to be present */ + if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max) + return -ERANGE; + if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND) + return -EPROTO; + return 0; } static void -lnet_ping_info_destroy(void) +lnet_ping_target_destroy(void) { struct lnet_net *net; struct lnet_ni *ni; @@ -1049,25 +1077,25 @@ lnet_ping_info_destroy(void) } } - lnet_ping_info_free(the_lnet.ln_ping_info); - the_lnet.ln_ping_info = NULL; + lnet_ping_buffer_decref(the_lnet.ln_ping_target); + the_lnet.ln_ping_target = NULL; lnet_net_unlock(LNET_LOCK_EX); } static void -lnet_ping_event_handler(struct lnet_event *event) +lnet_ping_target_event_handler(struct lnet_event *event) { - struct lnet_ping_info *pinfo = event->md.user_ptr; + struct lnet_ping_buffer *pbuf = event->md.user_ptr; if (event->unlinked) - pinfo->pi_features = LNET_PING_FEAT_INVAL; + lnet_ping_buffer_decref(pbuf); } static int -lnet_ping_info_setup(struct lnet_ping_info **ppinfo, - struct lnet_handle_md *md_handle, - int ni_count, bool set_eq) +lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf, + struct lnet_handle_md *ping_mdh, + int ni_count, bool set_eq) { struct lnet_process_id id = { .nid = LNET_NID_ANY, @@ -1078,72 +1106,76 @@ lnet_ping_info_setup(struct lnet_ping_info **ppinfo, int rc, rc2; if (set_eq) { - rc = LNetEQAlloc(0, lnet_ping_event_handler, + rc = LNetEQAlloc(0, lnet_ping_target_event_handler, &the_lnet.ln_ping_target_eq); if (rc != 0) { - CERROR("Can't allocate ping EQ: %d\n", rc); + CERROR("Can't allocate ping buffer EQ: %d\n", rc); return rc; } } - *ppinfo = lnet_ping_info_create(ni_count); - if (*ppinfo == NULL) { + *ppbuf = lnet_ping_target_create(ni_count); + if (*ppbuf == NULL) { rc = -ENOMEM; - goto failed_0; + goto fail_free_eq; } + /* Ping target ME/MD */ rc = LNetMEAttach(LNET_RESERVED_PORTAL, id, LNET_PROTO_PING_MATCHBITS, 0, LNET_UNLINK, LNET_INS_AFTER, &me_handle); if (rc != 0) { - CERROR("Can't create ping ME: %d\n", rc); - goto failed_1; + CERROR("Can't create ping target ME: %d\n", rc); + goto fail_decref_ping_buffer; } /* initialize md content */ - md.start = *ppinfo; - md.length = offsetof(struct lnet_ping_info, - pi_ni[(*ppinfo)->pi_nnis]); + md.start = &(*ppbuf)->pb_info; + md.length = LNET_PING_INFO_SIZE((*ppbuf)->pb_nnis); md.threshold = LNET_MD_THRESH_INF; md.max_size = 0; md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE | LNET_MD_MANAGE_REMOTE; - md.user_ptr = NULL; md.eq_handle = the_lnet.ln_ping_target_eq; - md.user_ptr = *ppinfo; + md.user_ptr = *ppbuf; - rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle); + rc = LNetMDAttach(me_handle, md, LNET_RETAIN, ping_mdh); if (rc != 0) { - CERROR("Can't attach ping MD: %d\n", rc); - goto failed_2; + CERROR("Can't attach ping target MD: %d\n", rc); + goto fail_unlink_ping_me; } + lnet_ping_buffer_addref(*ppbuf); return 0; -failed_2: +fail_unlink_ping_me: rc2 = LNetMEUnlink(me_handle); LASSERT(rc2 == 0); -failed_1: - lnet_ping_info_free(*ppinfo); - *ppinfo = NULL; -failed_0: - if (set_eq) - LNetEQFree(the_lnet.ln_ping_target_eq); +fail_decref_ping_buffer: + LASSERT(lnet_ping_buffer_numref(*ppbuf) == 1); + lnet_ping_buffer_decref(*ppbuf); + *ppbuf = NULL; +fail_free_eq: + if (set_eq) { + rc2 = LNetEQFree(the_lnet.ln_ping_target_eq); + LASSERT(rc2 == 0); + } return rc; } static void -lnet_ping_md_unlink(struct lnet_ping_info *pinfo, struct lnet_handle_md *md_handle) +lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf, + struct lnet_handle_md *ping_mdh) { sigset_t blocked = cfs_block_allsigs(); - LNetMDUnlink(*md_handle); - LNetInvalidateMDHandle(md_handle); + LNetMDUnlink(*ping_mdh); + LNetInvalidateMDHandle(ping_mdh); - /* NB md could be busy; this just starts the unlink */ - while (pinfo->pi_features != LNET_PING_FEAT_INVAL) { - CDEBUG(D_NET, "Still waiting for ping MD to unlink\n"); + /* NB the MD could be busy; this just starts the unlink */ + while (lnet_ping_buffer_numref(pbuf) > 1) { + CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n"); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(1)); } @@ -1152,62 +1184,74 @@ lnet_ping_md_unlink(struct lnet_ping_info *pinfo, struct lnet_handle_md *md_hand } static void -lnet_ping_info_install_locked(struct lnet_ping_info *ping_info) +lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf) { - int i; struct lnet_ni *ni; struct lnet_net *net; struct lnet_ni_status *ns; + int i; + int rc; i = 0; list_for_each_entry(net, &the_lnet.ln_nets, net_list) { list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { - LASSERT(i < ping_info->pi_nnis); + LASSERT(i < pbuf->pb_nnis); - ns = &ping_info->pi_ni[i]; + ns = &pbuf->pb_info.pi_ni[i]; ns->ns_nid = ni->ni_nid; lnet_ni_lock(ni); ns->ns_status = (ni->ni_status != NULL) ? - ni->ni_status->ns_status : + ni->ni_status->ns_status : LNET_NI_STATUS_UP; ni->ni_status = ns; lnet_ni_unlock(ni); i++; } - } + /* + * We (ab)use the ns_status of the loopback interface to + * transmit the sequence number. The first interface listed + * must be the loopback interface. + */ + rc = lnet_ping_info_validate(&pbuf->pb_info); + if (rc) { + LCONSOLE_EMERG("Invalid ping target: %d\n", rc); + LBUG(); + } + LNET_PING_BUFFER_SEQNO(pbuf) = + atomic_inc_return(&the_lnet.ln_ping_target_seqno); } static void -lnet_ping_target_update(struct lnet_ping_info *pinfo, - struct lnet_handle_md md_handle) +lnet_ping_target_update(struct lnet_ping_buffer *pbuf, + struct lnet_handle_md ping_mdh) { - struct lnet_ping_info *old_pinfo = NULL; - struct lnet_handle_md old_md; + struct lnet_ping_buffer *old_pbuf = NULL; + struct lnet_handle_md old_ping_md; /* switch the NIs to point to the new ping info created */ lnet_net_lock(LNET_LOCK_EX); if (!the_lnet.ln_routing) - pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED; - lnet_ping_info_install_locked(pinfo); + pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED; + lnet_ping_target_install_locked(pbuf); - if (the_lnet.ln_ping_info != NULL) { - old_pinfo = the_lnet.ln_ping_info; - old_md = the_lnet.ln_ping_target_md; + if (the_lnet.ln_ping_target) { + old_pbuf = the_lnet.ln_ping_target; + old_ping_md = the_lnet.ln_ping_target_md; } - the_lnet.ln_ping_target_md = md_handle; - the_lnet.ln_ping_info = pinfo; + the_lnet.ln_ping_target_md = ping_mdh; + the_lnet.ln_ping_target = pbuf; lnet_net_unlock(LNET_LOCK_EX); - if (old_pinfo != NULL) { - /* unlink the old ping info */ - lnet_ping_md_unlink(old_pinfo, &old_md); - lnet_ping_info_free(old_pinfo); + if (old_pbuf) { + /* unlink and free the old ping info */ + lnet_ping_md_unlink(old_pbuf, &old_ping_md); + lnet_ping_buffer_decref(old_pbuf); } } @@ -1216,13 +1260,13 @@ lnet_ping_target_fini(void) { int rc; - lnet_ping_md_unlink(the_lnet.ln_ping_info, + lnet_ping_md_unlink(the_lnet.ln_ping_target, &the_lnet.ln_ping_target_md); rc = LNetEQFree(the_lnet.ln_ping_target_eq); LASSERT(rc == 0); - lnet_ping_info_destroy(); + lnet_ping_target_destroy(); } static int @@ -1816,8 +1860,8 @@ LNetNIInit(lnet_pid_t requested_pid) int im_a_router = 0; int rc; int ni_count; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; + struct lnet_ping_buffer *pbuf; + struct lnet_handle_md ping_mdh; struct list_head net_head; struct lnet_net *net; @@ -1892,11 +1936,11 @@ LNetNIInit(lnet_pid_t requested_pid) the_lnet.ln_refcount = 1; /* Now I may use my own API functions... */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true); + rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_count, true); if (rc != 0) goto err_acceptor_stop; - lnet_ping_target_update(pinfo, md_handle); + lnet_ping_target_update(pbuf, ping_mdh); rc = lnet_router_checker_start(); if (rc != 0) @@ -2006,7 +2050,10 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni, } cfg_ni->lic_nid = ni->ni_nid; - cfg_ni->lic_status = ni->ni_status->ns_status; + if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND) + cfg_ni->lic_status = LNET_NI_STATUS_UP; + else + cfg_ni->lic_status = ni->ni_status->ns_status; cfg_ni->lic_tcp_bonding = use_tcp_bonding; cfg_ni->lic_dev_cpt = ni->ni_dev_cpt; @@ -2091,7 +2138,10 @@ lnet_fill_ni_info_legacy(struct lnet_ni *ni, config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_net->net_tunables.lct_peer_rtr_credits; - net_config->ni_status = ni->ni_status->ns_status; + if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND) + net_config->ni_status = LNET_NI_STATUS_UP; + else + net_config->ni_status = ni->ni_status->ns_status; if (ni->ni_cpts) { int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT); @@ -2242,8 +2292,8 @@ static int lnet_add_net_common(struct lnet_net *net, struct lnet_ioctl_config_lnd_tunables *tun) { __u32 net_id; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; + struct lnet_ping_buffer *pbuf; + struct lnet_handle_md ping_mdh; int rc; struct lnet_remotenet *rnet; int net_ni_count; @@ -2265,7 +2315,7 @@ static int lnet_add_net_common(struct lnet_net *net, /* * make sure you calculate the correct number of slots in the ping - * info. Since the ping info is a flattened list of all the NIs, + * buffer. Since the ping info is a flattened list of all the NIs, * we should allocate enough slots to accomodate the number of NIs * which will be added. * @@ -2274,9 +2324,9 @@ static int lnet_add_net_common(struct lnet_net *net, */ net_ni_count = lnet_get_net_ni_count_pre(net); - rc = lnet_ping_info_setup(&pinfo, &md_handle, - net_ni_count + lnet_get_ni_count(), - false); + rc = lnet_ping_target_setup(&pbuf, &ping_mdh, + net_ni_count + lnet_get_ni_count(), + false); if (rc < 0) { lnet_net_free(net); return rc; @@ -2327,13 +2377,13 @@ static int lnet_add_net_common(struct lnet_net *net, lnet_peer_net_added(net); lnet_net_unlock(LNET_LOCK_EX); - lnet_ping_target_update(pinfo, md_handle); + lnet_ping_target_update(pbuf, ping_mdh); return 0; failed: - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); + lnet_ping_md_unlink(pbuf, &ping_mdh); + lnet_ping_buffer_decref(pbuf); return rc; } @@ -2424,8 +2474,8 @@ int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf) struct lnet_net *net; struct lnet_ni *ni; __u32 net_id = LNET_NIDNET(conf->lic_nid); - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; + struct lnet_ping_buffer *pbuf; + struct lnet_handle_md ping_mdh; int rc; int net_count; __u32 addr; @@ -2443,7 +2493,7 @@ int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf) CERROR("net %s not found\n", libcfs_net2str(net_id)); rc = -ENOENT; - goto net_unlock; + goto unlock_net; } addr = LNET_NIDADDR(conf->lic_nid); @@ -2454,28 +2504,28 @@ int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf) lnet_net_unlock(0); /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, + rc = lnet_ping_target_setup(&pbuf, &ping_mdh, lnet_get_ni_count() - net_count, false); if (rc != 0) - goto out; + goto unlock_api_mutex; lnet_shutdown_lndnet(net); if (lnet_count_acceptor_nets() == 0) lnet_acceptor_stop(); - lnet_ping_target_update(pinfo, md_handle); + lnet_ping_target_update(pbuf, ping_mdh); - goto out; + goto unlock_api_mutex; } ni = lnet_nid2ni_locked(conf->lic_nid, 0); if (!ni) { - CERROR("nid %s not found \n", + CERROR("nid %s not found\n", libcfs_nid2str(conf->lic_nid)); rc = -ENOENT; - goto net_unlock; + goto unlock_net; } net_count = lnet_get_net_ni_count_locked(net); @@ -2483,27 +2533,27 @@ int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf) lnet_net_unlock(0); /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, + rc = lnet_ping_target_setup(&pbuf, &ping_mdh, lnet_get_ni_count() - 1, false); if (rc != 0) - goto out; + goto unlock_api_mutex; lnet_shutdown_lndni(ni); if (lnet_count_acceptor_nets() == 0) lnet_acceptor_stop(); - lnet_ping_target_update(pinfo, md_handle); + lnet_ping_target_update(pbuf, ping_mdh); /* check if the net is empty and remove it if it is */ if (net_count == 1) lnet_shutdown_lndnet(net); - goto out; + goto unlock_api_mutex; -net_unlock: +unlock_net: lnet_net_unlock(0); -out: +unlock_api_mutex: mutex_unlock(&the_lnet.ln_api_mutex); return rc; @@ -2571,8 +2621,8 @@ int lnet_dyn_del_net(__u32 net_id) { struct lnet_net *net; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; + struct lnet_ping_buffer *pbuf; + struct lnet_handle_md ping_mdh; int rc; int net_ni_count; @@ -2596,8 +2646,8 @@ lnet_dyn_del_net(__u32 net_id) lnet_net_unlock(0); /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, - lnet_get_ni_count() - net_ni_count, false); + rc = lnet_ping_target_setup(&pbuf, &ping_mdh, + lnet_get_ni_count() - net_ni_count, false); if (rc != 0) goto out; @@ -2606,7 +2656,7 @@ lnet_dyn_del_net(__u32 net_id) if (lnet_count_acceptor_nets() == 0) lnet_acceptor_stop(); - lnet_ping_target_update(pinfo, md_handle); + lnet_ping_target_update(pbuf, ping_mdh); out: mutex_unlock(&the_lnet.ln_api_mutex); @@ -3044,8 +3094,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, int unlinked = 0; int replied = 0; const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC); - int infosz; - struct lnet_ping_info *info; + struct lnet_ping_buffer *pbuf; struct lnet_process_id tmpid; int i; int nob; @@ -3053,8 +3102,6 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, int rc2; sigset_t blocked; - infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]); - /* n_ids limit is arbitrary */ if (n_ids <= 0 || n_ids > lnet_interfaces_max || id.nid == LNET_NID_ANY) return -EINVAL; @@ -3062,20 +3109,20 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, if (id.pid == LNET_PID_ANY) id.pid = LNET_PID_LUSTRE; - LIBCFS_ALLOC(info, infosz); - if (info == NULL) + pbuf = lnet_ping_buffer_alloc(n_ids, GFP_NOFS); + if (!pbuf) return -ENOMEM; /* NB 2 events max (including any unlink event) */ rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); if (rc != 0) { CERROR("Can't allocate EQ: %d\n", rc); - goto out_0; + goto fail_ping_buffer_decref; } /* initialize md content */ - md.start = info; - md.length = infosz; + md.start = &pbuf->pb_info; + md.length = LNET_PING_INFO_SIZE(n_ids); md.threshold = 2; /*GET/REPLY*/ md.max_size = 0; md.options = LNET_MD_TRUNCATE; @@ -3085,7 +3132,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, rc = LNetMDBind(md, LNET_UNLINK, &mdh); if (rc != 0) { CERROR("Can't bind MD: %d\n", rc); - goto out_1; + goto fail_free_eq; } rc = LNetGet(LNET_NID_ANY, mdh, id, @@ -3150,11 +3197,11 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, CWARN("%s: Unexpected rc >= 0 but no reply!\n", libcfs_id2str(id)); rc = -EIO; - goto out_1; + goto fail_free_eq; } nob = rc; - LASSERT(nob >= 0 && nob <= infosz); + LASSERT(nob >= 0 && nob <= LNET_PING_INFO_SIZE(n_ids)); rc = -EPROTO; /* if I can't parse... */ @@ -3162,56 +3209,56 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, /* can't check magic/version */ CERROR("%s: ping info too short %d\n", libcfs_id2str(id), nob); - goto out_1; + goto fail_free_eq; } - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { - lnet_swap_pinginfo(info); - } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) { + if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { + lnet_swap_pinginfo(pbuf); + } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) { CERROR("%s: Unexpected magic %08x\n", - libcfs_id2str(id), info->pi_magic); - goto out_1; + libcfs_id2str(id), pbuf->pb_info.pi_magic); + goto fail_free_eq; } - if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) { + if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) { CERROR("%s: ping w/o NI status: 0x%x\n", - libcfs_id2str(id), info->pi_features); - goto out_1; + libcfs_id2str(id), pbuf->pb_info.pi_features); + goto fail_free_eq; } - if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) { + if (nob < LNET_PING_INFO_SIZE(0)) { CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), - nob, (int)offsetof(struct lnet_ping_info, pi_ni[0])); - goto out_1; + nob, (int)LNET_PING_INFO_SIZE(0)); + goto fail_free_eq; } - if (info->pi_nnis < n_ids) - n_ids = info->pi_nnis; + if (pbuf->pb_info.pi_nnis < n_ids) + n_ids = pbuf->pb_info.pi_nnis; - if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) { + if (nob < LNET_PING_INFO_SIZE(n_ids)) { CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), - nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids])); - goto out_1; + nob, (int)LNET_PING_INFO_SIZE(n_ids)); + goto fail_free_eq; } rc = -EFAULT; /* If I SEGV... */ memset(&tmpid, 0, sizeof(tmpid)); for (i = 0; i < n_ids; i++) { - tmpid.pid = info->pi_pid; - tmpid.nid = info->pi_ni[i].ns_nid; + tmpid.pid = pbuf->pb_info.pi_pid; + tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid; if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) - goto out_1; + goto fail_free_eq; } - rc = info->pi_nnis; + rc = pbuf->pb_info.pi_nnis; - out_1: + fail_free_eq: rc2 = LNetEQFree(eqh); if (rc2 != 0) CERROR("rc2 %d\n", rc2); LASSERT(rc2 == 0); - out_0: - LIBCFS_FREE(info, infosz); + fail_ping_buffer_decref: + lnet_ping_buffer_decref(pbuf); return rc; } diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index aba5699..aa12eb7 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -650,17 +650,21 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, } void -lnet_swap_pinginfo(struct lnet_ping_info *info) +lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf) { - int i; struct lnet_ni_status *stat; + int nnis; + int i; - __swab32s(&info->pi_magic); - __swab32s(&info->pi_features); - __swab32s(&info->pi_pid); - __swab32s(&info->pi_nnis); - for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { - stat = &info->pi_ni[i]; + __swab32s(&pbuf->pb_info.pi_magic); + __swab32s(&pbuf->pb_info.pi_features); + __swab32s(&pbuf->pb_info.pi_pid); + __swab32s(&pbuf->pb_info.pi_nnis); + nnis = pbuf->pb_info.pi_nnis; + if (nnis > pbuf->pb_nnis) + nnis = pbuf->pb_nnis; + for (i = 0; i < nnis; i++) { + stat = &pbuf->pb_info.pi_ni[i]; __swab64s(&stat->ns_nid); __swab32s(&stat->ns_status); } @@ -674,11 +678,12 @@ lnet_swap_pinginfo(struct lnet_ping_info *info) static void lnet_parse_rc_info(struct lnet_rc_data *rcd) { - struct lnet_ping_info *info = rcd->rcd_pinginfo; + struct lnet_ping_buffer *pbuf = rcd->rcd_pingbuffer; struct lnet_peer_ni *gw = rcd->rcd_gateway; struct lnet_route *rte; + int nnis; - if (!gw->lpni_alive) + if (!gw->lpni_alive || !pbuf) return; /* @@ -687,51 +692,48 @@ lnet_parse_rc_info(struct lnet_rc_data *rcd) */ spin_lock(&gw->lpni_lock); - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) - lnet_swap_pinginfo(info); + if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) + lnet_swap_pinginfo(pbuf); /* NB always racing with network! */ - if (info->pi_magic != LNET_PROTO_PING_MAGIC) { + if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) { CDEBUG(D_NET, "%s: Unexpected magic %08x\n", - libcfs_nid2str(gw->lpni_nid), info->pi_magic); + libcfs_nid2str(gw->lpni_nid), pbuf->pb_info.pi_magic); gw->lpni_ping_feats = LNET_PING_FEAT_INVAL; - spin_unlock(&gw->lpni_lock); - return; + goto out; } - gw->lpni_ping_feats = info->pi_features; - if ((gw->lpni_ping_feats & LNET_PING_FEAT_MASK) == 0) { - CDEBUG(D_NET, "%s: Unexpected features 0x%x\n", - libcfs_nid2str(gw->lpni_nid), gw->lpni_ping_feats); - spin_unlock(&gw->lpni_lock); - return; /* nothing I can understand */ - } + gw->lpni_ping_feats = pbuf->pb_info.pi_features; - if ((gw->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) { - spin_unlock(&gw->lpni_lock); - return; /* can't carry NI status info */ - } + /* Without NI status info there's nothing more to do. */ + if ((gw->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) + goto out; + + /* Determine the number of NIs for which there is data. */ + nnis = pbuf->pb_info.pi_nnis; + if (pbuf->pb_nnis < nnis) + nnis = pbuf->pb_nnis; list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) { int down = 0; int up = 0; int i; + /* If routing disabled then the route is down. */ if ((gw->lpni_ping_feats & LNET_PING_FEAT_RTE_DISABLED) != 0) { rte->lr_downis = 1; continue; } - for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { - struct lnet_ni_status *stat = &info->pi_ni[i]; + for (i = 0; i < nnis; i++) { + lnet_ni_status_t *stat = &pbuf->pb_info.pi_ni[i]; lnet_nid_t nid = stat->ns_nid; if (nid == LNET_NID_ANY) { CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n", libcfs_nid2str(gw->lpni_nid)); gw->lpni_ping_feats = LNET_PING_FEAT_INVAL; - spin_unlock(&gw->lpni_lock); - return; + goto out; } if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) @@ -753,8 +755,7 @@ lnet_parse_rc_info(struct lnet_rc_data *rcd) CDEBUG(D_NET, "%s: Unexpected status 0x%x\n", libcfs_nid2str(gw->lpni_nid), stat->ns_status); gw->lpni_ping_feats = LNET_PING_FEAT_INVAL; - spin_unlock(&gw->lpni_lock); - return; + goto out; } if (up) { /* ignore downed NIs if NI for dest network is up */ @@ -768,7 +769,7 @@ lnet_parse_rc_info(struct lnet_rc_data *rcd) rte->lr_downis = down; } - +out: spin_unlock(&gw->lpni_lock); } @@ -932,8 +933,8 @@ lnet_destroy_rc_data(struct lnet_rc_data *rcd) lnet_net_unlock(cpt); } - if (rcd->rcd_pinginfo != NULL) - LIBCFS_FREE(rcd->rcd_pinginfo, LNET_PINGINFO_SIZE); + if (rcd->rcd_pingbuffer != NULL) + lnet_ping_buffer_decref(rcd->rcd_pingbuffer); LIBCFS_FREE(rcd, sizeof(*rcd)); } @@ -942,7 +943,7 @@ static struct lnet_rc_data * lnet_create_rc_data_locked(struct lnet_peer_ni *gateway) { struct lnet_rc_data *rcd = NULL; - struct lnet_ping_info *pi; + struct lnet_ping_buffer *pbuf; int rc; int i; @@ -955,20 +956,20 @@ lnet_create_rc_data_locked(struct lnet_peer_ni *gateway) LNetInvalidateMDHandle(&rcd->rcd_mdh); INIT_LIST_HEAD(&rcd->rcd_list); - LIBCFS_ALLOC(pi, LNET_PINGINFO_SIZE); - if (pi == NULL) + pbuf = lnet_ping_buffer_alloc(LNET_MAX_RTR_NIS, GFP_NOFS); + if (pbuf == NULL) goto out; for (i = 0; i < LNET_MAX_RTR_NIS; i++) { - pi->pi_ni[i].ns_nid = LNET_NID_ANY; - pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID; + pbuf->pb_info.pi_ni[i].ns_nid = LNET_NID_ANY; + pbuf->pb_info.pi_ni[i].ns_status = LNET_NI_STATUS_INVALID; } - rcd->rcd_pinginfo = pi; + rcd->rcd_pingbuffer = pbuf; LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh)); - rc = LNetMDBind((struct lnet_md){.start = pi, + rc = LNetMDBind((struct lnet_md){.start = &pbuf->pb_info, .user_ptr = rcd, - .length = LNET_PINGINFO_SIZE, + .length = LNET_RTR_PINGINFO_SIZE, .threshold = LNET_MD_THRESH_INF, .options = LNET_MD_TRUNCATE, .eq_handle = the_lnet.ln_rc_eqh}, @@ -1741,7 +1742,8 @@ lnet_rtrpools_enable(void) lnet_net_lock(LNET_LOCK_EX); the_lnet.ln_routing = 1; - the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED; + the_lnet.ln_ping_target->pb_info.pi_features &= + ~LNET_PING_FEAT_RTE_DISABLED; lnet_net_unlock(LNET_LOCK_EX); return rc; @@ -1755,7 +1757,8 @@ lnet_rtrpools_disable(void) lnet_net_lock(LNET_LOCK_EX); the_lnet.ln_routing = 0; - the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED; + the_lnet.ln_ping_target->pb_info.pi_features |= + LNET_PING_FEAT_RTE_DISABLED; tiny_router_buffers = 0; small_router_buffers = 0; -- 1.8.3.1