From e1dbfdd53e2ce95433f17ea610a5987c128e9c51 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Fri, 8 May 2020 10:53:53 +1000 Subject: [PATCH] LU-10391 lnet: introduce lnet_processid for ksock_peer_ni struct lnet_processid (without the '_') is like lnet_process_id, but contains a 'struct lnet_nid' rather than lnet_nid_t. So far it is only used for ksnp_id in struct ksock_peer_ni, and related functions. Test-Parameters: trivial Test-Parameters: serverversion=2.12 serverdistro=el7.9 testlist=runtests Test-Parameters: clientversion=2.12 testlist=runtests Signed-off-by: Mr NeilBrown Change-Id: I1fea693b1c84ca4c3ac1821f55874ad11519a33b Reviewed-on: https://review.whamcloud.com/42104 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Chris Horn Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 1 + lnet/include/uapi/linux/lnet/lnet-types.h | 36 ++- lnet/include/uapi/linux/lnet/nidstr.h | 1 + lnet/klnds/socklnd/socklnd.c | 280 +++++++++++----------- lnet/klnds/socklnd/socklnd.h | 10 +- lnet/klnds/socklnd/socklnd_cb.c | 373 ++++++++++++++++-------------- lnet/klnds/socklnd/socklnd_proto.c | 34 +-- lnet/lnet/api-ni.c | 19 +- lnet/lnet/nidstrings.c | 18 ++ 9 files changed, 436 insertions(+), 336 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index fbbc0d6..8fea3c7 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -524,6 +524,7 @@ extern unsigned int lnet_nid_cpt_hash(struct lnet_nid *nid, unsigned int number); extern int lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni); extern int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni); +extern int lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni); extern struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); extern struct lnet_ni *lnet_nid2ni_addref(lnet_nid_t nid); extern struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt); diff --git a/lnet/include/uapi/linux/lnet/lnet-types.h b/lnet/include/uapi/linux/lnet/lnet-types.h index da5b03c..bfc4bb3 100644 --- a/lnet/include/uapi/linux/lnet/lnet-types.h +++ b/lnet/include/uapi/linux/lnet/lnet-types.h @@ -159,12 +159,14 @@ static inline int nid_same(const struct lnet_nid *n1, } /* This can be used when we need to hash a nid */ -static inline unsigned long nidhash(lnet_nid_t nid) +static inline unsigned long nidhash(const struct lnet_nid *nid) { + int i; unsigned long hash = 0; - hash ^= LNET_NIDNET(nid); - hash ^= LNET_NIDADDR(nid); + hash ^= LNET_NID_NET(nid); + for (i = 0; i < 4; i++) + hash ^= nid->nid_addr[i]; return hash; } @@ -242,6 +244,34 @@ struct lnet_process_id { /** process id */ lnet_pid_t pid; }; + +/** + * Global process ID - with large addresses + */ +struct lnet_processid { + /** node id */ + struct lnet_nid nid; + /** process id */ + lnet_pid_t pid; +}; + +static inline void +lnet_pid4_to_pid(struct lnet_process_id pid4, struct lnet_processid *pid) +{ + pid->pid = pid4.pid; + lnet_nid4_to_nid(pid4.nid, &pid->nid); +} + +static inline struct lnet_process_id +lnet_pid_to_pid4(struct lnet_processid *pid) +{ + struct lnet_process_id ret; + + ret.pid = pid->pid; + ret.nid = lnet_nid_to_nid4(&pid->nid); + return ret; +} + /** @} lnet_addr */ /** \addtogroup lnet_me diff --git a/lnet/include/uapi/linux/lnet/nidstr.h b/lnet/include/uapi/linux/lnet/nidstr.h index a24523f..57c1c33 100644 --- a/lnet/include/uapi/linux/lnet/nidstr.h +++ b/lnet/include/uapi/linux/lnet/nidstr.h @@ -97,6 +97,7 @@ int libcfs_strnid(struct lnet_nid *nid, const char *str); int libcfs_str2anynid(lnet_nid_t *nid, const char *str); int libcfs_num_parse(char *str, int len, struct list_head *list); char *libcfs_id2str(struct lnet_process_id id); +char *libcfs_idstr(struct lnet_processid *id); void cfs_free_nidlist(struct list_head *list); int cfs_parse_nidlist(char *str, int len, struct list_head *list); int cfs_print_nidlist(char *buffer, int count, struct list_head *list); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index ad93203..4c663ea 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -152,14 +152,14 @@ ksocknal_destroy_conn_cb(struct ksock_conn_cb *conn_cb) } static struct ksock_peer_ni * -ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_create_peer(struct lnet_ni *ni, struct lnet_processid *id) { - int cpt = lnet_cpt_of_nid(id.nid, ni); + int cpt = lnet_nid2cpt(&id->nid, ni); struct ksock_net *net = ni->ni_data; struct ksock_peer_ni *peer_ni; - LASSERT(id.nid != LNET_NID_ANY); - LASSERT(id.pid != LNET_PID_ANY); + LASSERT(!LNET_NID_IS_ANY(&id->nid)); + LASSERT(id->pid != LNET_PID_ANY); LASSERT(!in_interrupt()); if (!atomic_inc_unless_negative(&net->ksnn_npeers)) { @@ -174,7 +174,7 @@ ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id) } peer_ni->ksnp_ni = ni; - peer_ni->ksnp_id = id; + peer_ni->ksnp_id = *id; refcount_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */ peer_ni->ksnp_closing = 0; peer_ni->ksnp_accepting = 0; @@ -197,7 +197,7 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) struct ksock_net *net = peer_ni->ksnp_ni->ni_data; CDEBUG (D_NET, "peer_ni %s %p deleted\n", - libcfs_id2str(peer_ni->ksnp_id), peer_ni); + libcfs_idstr(&peer_ni->ksnp_id), peer_ni); LASSERT(refcount_read(&peer_ni->ksnp_refcount) == 0); LASSERT(peer_ni->ksnp_accepting == 0); @@ -218,10 +218,10 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) } struct ksock_peer_ni * -ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_processid *id) { struct ksock_peer_ni *peer_ni; - unsigned long hash = nidhash(id.nid); + unsigned long hash = nidhash(&id->nid); hash_for_each_possible(ksocknal_data.ksnd_peers, peer_ni, ksnp_list, hash) { @@ -230,12 +230,12 @@ ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) if (peer_ni->ksnp_ni != ni) continue; - if (peer_ni->ksnp_id.nid != id.nid || - peer_ni->ksnp_id.pid != id.pid) + if (!nid_same(&peer_ni->ksnp_id.nid, &id->nid) || + peer_ni->ksnp_id.pid != id->pid) continue; CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d)\n", - peer_ni, libcfs_id2str(id), + peer_ni, libcfs_idstr(id), refcount_read(&peer_ni->ksnp_refcount)); return peer_ni; } @@ -243,7 +243,7 @@ ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) } struct ksock_peer_ni * -ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_find_peer(struct lnet_ni *ni, struct lnet_processid *id) { struct ksock_peer_ni *peer_ni; @@ -253,7 +253,7 @@ ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) ksocknal_peer_addref(peer_ni); read_unlock(&ksocknal_data.ksnd_global_lock); - return (peer_ni); + return peer_ni; } static void @@ -312,7 +312,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, if (index-- > 0) continue; - *id = peer_ni->ksnp_id; + id->pid = peer_ni->ksnp_id.pid; + id->nid = lnet_nid_to_nid4(&peer_ni->ksnp_id.nid); *myip = 0; *peer_ip = 0; *port = 0; @@ -326,7 +327,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, if (index-- > 0) continue; - *id = peer_ni->ksnp_id; + id->pid = peer_ni->ksnp_id.pid; + id->nid = lnet_nid_to_nid4(&peer_ni->ksnp_id.nid); *myip = peer_ni->ksnp_passive_ips[j]; *peer_ip = 0; *port = 0; @@ -342,7 +344,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, conn_cb = peer_ni->ksnp_conn_cb; - *id = peer_ni->ksnp_id; + id->pid = peer_ni->ksnp_id.pid; + id->nid = lnet_nid_to_nid4(&peer_ni->ksnp_id.nid); if (conn_cb->ksnr_addr.ss_family == AF_INET) { struct sockaddr_in *sa = (void *)&conn_cb->ksnr_addr; @@ -463,13 +466,13 @@ ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, if (conn_cb->ksnr_myiface < 0) { /* route wasn't bound locally yet (the initial route) */ CDEBUG(D_NET, "Binding %s %pIS to interface %d\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn_cb->ksnr_addr, conn_iface); } else { CDEBUG(D_NET, "Rebinding %s %pIS from interface %d to %d\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn_cb->ksnr_addr, conn_cb->ksnr_myiface, conn_iface); @@ -565,19 +568,23 @@ ksocknal_del_conn_cb_locked(struct ksock_conn_cb *conn_cb) } int -ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, +ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id4, struct sockaddr *addr) { struct ksock_peer_ni *peer_ni; struct ksock_peer_ni *peer2; struct ksock_conn_cb *conn_cb; + struct lnet_processid id; - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) + if (id4.nid == LNET_NID_ANY || + id4.pid == LNET_PID_ANY) return (-EINVAL); + id.pid = id4.pid; + lnet_nid4_to_nid(id4.nid, &id.nid); + /* Have a brand new peer_ni ready... */ - peer_ni = ksocknal_create_peer(ni, id); + peer_ni = ksocknal_create_peer(ni, &id); if (IS_ERR(peer_ni)) return PTR_ERR(peer_ni); @@ -593,14 +600,14 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0); - peer2 = ksocknal_find_peer_locked(ni, id); + peer2 = ksocknal_find_peer_locked(ni, &id); if (peer2 != NULL) { ksocknal_peer_decref(peer_ni); peer_ni = peer2; } else { /* peer_ni table takes my ref on peer_ni */ hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, - nidhash(id.nid)); + nidhash(&id.nid)); } ksocknal_add_conn_cb_locked(peer_ni, conn_cb); @@ -641,7 +648,7 @@ ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni, __u32 ip) } static int -ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) +ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id4, __u32 ip) { LIST_HEAD(zombies); struct hlist_node *pnxt; @@ -650,11 +657,15 @@ ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) int hi; int i; int rc = -ENOENT; + struct lnet_processid id; + + id.pid = id4.pid; + lnet_nid4_to_nid(id4.nid, &id.nid); write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) { - lo = hash_min(nidhash(id.nid), + if (!LNET_NID_IS_ANY(&id.nid)) { + lo = hash_min(nidhash(&id.nid), HASH_BITS(ksocknal_data.ksnd_peers)); hi = lo; } else { @@ -669,8 +680,8 @@ ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) if (peer_ni->ksnp_ni != ni) continue; - if (!((id.nid == LNET_NID_ANY || - peer_ni->ksnp_id.nid == id.nid) && + if (!((LNET_NID_IS_ANY(&id.nid) || + nid_same(&peer_ni->ksnp_id.nid, &id.nid)) && (id.pid == LNET_PID_ANY || peer_ni->ksnp_id.pid == id.pid))) continue; @@ -800,7 +811,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, { rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; LIST_HEAD(zombies); - struct lnet_process_id peerid; + struct lnet_process_id peerid4; u64 incarnation; struct ksock_conn *conn; struct ksock_conn *conn2; @@ -870,7 +881,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, /* Active connection sends HELLO eagerly */ hello->kshm_nips = 0; - peerid = peer_ni->ksnp_id; + peerid4 = lnet_pid_to_pid4(&peer_ni->ksnp_id); write_lock_bh(global_lock); conn->ksnc_proto = peer_ni->ksnp_proto; @@ -886,32 +897,35 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, #endif } - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + rc = ksocknal_send_hello(ni, conn, peerid4.nid, hello); if (rc != 0) goto failed_1; } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; + peerid4.nid = LNET_NID_ANY; + peerid4.pid = LNET_PID_ANY; /* Passive, get protocol from peer_ni */ conn->ksnc_proto = NULL; } - rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation); + rc = ksocknal_recv_hello(ni, conn, hello, &peerid4, &incarnation); if (rc < 0) goto failed_1; LASSERT(rc == 0 || active); LASSERT(conn->ksnc_proto != NULL); - LASSERT(peerid.nid != LNET_NID_ANY); + LASSERT(peerid4.nid != LNET_NID_ANY); - cpt = lnet_cpt_of_nid(peerid.nid, ni); + cpt = lnet_cpt_of_nid(peerid4.nid, ni); if (active) { ksocknal_peer_addref(peer_ni); write_lock_bh(global_lock); } else { - peer_ni = ksocknal_create_peer(ni, peerid); + struct lnet_processid peerid; + + lnet_pid4_to_pid(peerid4, &peerid); + peer_ni = ksocknal_create_peer(ni, &peerid); if (IS_ERR(peer_ni)) { rc = PTR_ERR(peer_ni); goto failed_1; @@ -922,12 +936,12 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, /* called with a ref on ni, so shutdown can't have started */ LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0); - peer2 = ksocknal_find_peer_locked(ni, peerid); + peer2 = ksocknal_find_peer_locked(ni, &peerid); if (peer2 == NULL) { /* NB this puts an "empty" peer_ni in the peer_ni * table (which takes my ref) */ hash_add(ksocknal_data.ksnd_peers, - &peer_ni->ksnp_list, nidhash(peerid.nid)); + &peer_ni->ksnp_list, nidhash(&peerid.nid)); } else { ksocknal_peer_decref(peer_ni); peer_ni = peer2; @@ -940,7 +954,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, /* Am I already connecting to this guy? Resolve in * favour of higher NID... */ - if (peerid.nid < lnet_nid_to_nid4(&ni->ni_nid) && + if (peerid4.nid < lnet_nid_to_nid4(&ni->ni_nid) && ksocknal_connecting(peer_ni->ksnp_conn_cb, ((struct sockaddr *) &conn->ksnc_peeraddr))) { rc = EALREADY; @@ -1037,7 +1051,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, !rpc_cmp_addr((struct sockaddr *)&conn_cb->ksnr_addr, (struct sockaddr *)&conn->ksnc_peeraddr)) { CERROR("Route %s %pIS connected to %pIS\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn_cb->ksnr_addr, &conn->ksnc_peeraddr); } @@ -1107,13 +1121,13 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, CDEBUG(D_NET, "New conn %s p %d.x %pIS -> %pISp" " incarnation:%lld sched[%d]\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, + libcfs_id2str(peerid4), conn->ksnc_proto->pro_version, &conn->ksnc_myaddr, &conn->ksnc_peeraddr, incarnation, cpt); if (!active) { hello->kshm_nips = 0; - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + rc = ksocknal_send_hello(ni, conn, peerid4.nid, hello); } LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg, @@ -1170,10 +1184,10 @@ failed_2: if (warn != NULL) { if (rc < 0) CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); + libcfs_id2str(peerid4), conn->ksnc_type, warn); else CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); + libcfs_id2str(peerid4), conn->ksnc_type, warn); } if (!active) { @@ -1183,7 +1197,7 @@ failed_2: */ conn->ksnc_type = SOCKLND_CONN_NONE; hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); + ksocknal_send_hello(ni, conn, peerid4.nid, hello); } write_lock_bh(global_lock); @@ -1320,7 +1334,8 @@ ksocknal_peer_failed(struct ksock_peer_ni *peer_ni) read_unlock(&ksocknal_data.ksnd_global_lock); if (notify) - lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, + lnet_notify(peer_ni->ksnp_ni, + lnet_nid_to_nid4(&peer_ni->ksnp_id.nid), false, false, last_alive); } @@ -1458,9 +1473,10 @@ ksocknal_destroy_conn(struct ksock_conn *conn) last_rcv = conn->ksnc_rx_deadline - ksocknal_timeout(); CERROR("Completing partial receive from %s[%d], ip %pISp, with error, wanted: %d, left: %d, last alive is %lld secs ago\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + conn->ksnc_type, &conn->ksnc_peeraddr, - conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, + conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, ktime_get_seconds() - last_rcv); if (conn->ksnc_lnet_msg) conn->ksnc_lnet_msg->msg_health_status = @@ -1470,31 +1486,31 @@ ksocknal_destroy_conn(struct ksock_conn *conn) case SOCKNAL_RX_LNET_HEADER: if (conn->ksnc_rx_started) CERROR("Incomplete receive of lnet header from %s, ip %pISp, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr, conn->ksnc_proto->pro_version); break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) + case SOCKNAL_RX_KSM_HEADER: + if (conn->ksnc_rx_started) CERROR("Incomplete receive of ksock message from %s, ip %pISp, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr, conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) + break; + case SOCKNAL_RX_SLOP: + if (conn->ksnc_rx_started) CERROR("Incomplete receive of slops from %s, ip %pISp, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr); - break; - default: - LBUG (); - break; - } + break; + default: + LBUG(); + break; + } - ksocknal_peer_decref(conn->ksnc_peer); + ksocknal_peer_decref(conn->ksnc_peer); - LIBCFS_FREE (conn, sizeof (*conn)); + LIBCFS_FREE(conn, sizeof(*conn)); } int @@ -1534,7 +1550,7 @@ ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why) } int -ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) +ksocknal_close_matching_conns(struct lnet_processid *id, __u32 ipaddr) { struct ksock_peer_ni *peer_ni; struct hlist_node *pnxt; @@ -1546,8 +1562,8 @@ ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) { - lo = hash_min(nidhash(id.nid), + if (!LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), HASH_BITS(ksocknal_data.ksnd_peers)); hi = lo; } else { @@ -1561,10 +1577,10 @@ ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) &ksocknal_data.ksnd_peers[i], ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer_ni->ksnp_id.pid))) + if (!((LNET_NID_IS_ANY(&id->nid) || + nid_same(&id->nid, &peer_ni->ksnp_id.nid)) && + (id->pid == LNET_PID_ANY || + id->pid == peer_ni->ksnp_id.pid))) continue; count += ksocknal_close_peer_conns_locked( @@ -1576,7 +1592,8 @@ ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) write_unlock_bh(&ksocknal_data.ksnd_global_lock); /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) + if (LNET_NID_IS_ANY(&id->nid) || id->pid == LNET_PID_ANY || + ipaddr == 0) return 0; return (count == 0 ? -ENOENT : 0); @@ -1588,15 +1605,15 @@ ksocknal_notify_gw_down(lnet_nid_t gw_nid) /* The router is telling me she's been notified of a change in * gateway state.... */ - struct lnet_process_id id = { - .nid = gw_nid, + struct lnet_processid id = { .pid = LNET_PID_ANY, }; CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid)); + lnet_nid4_to_nid(gw_nid, &id.nid); /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns(id, 0); + ksocknal_close_matching_conns(&id, 0); return; /* We can only establish new connections @@ -1634,15 +1651,15 @@ ksocknal_push_peer(struct ksock_peer_ni *peer_ni) } static int -ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_push(struct lnet_ni *ni, struct lnet_processid *id) { int lo; int hi; int bkt; int rc = -ENOENT; - if (id.nid != LNET_NID_ANY) { - lo = hash_min(nidhash(id.nid), + if (!LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), HASH_BITS(ksocknal_data.ksnd_peers)); hi = lo; } else { @@ -1661,10 +1678,11 @@ ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) hlist_for_each_entry(peer_ni, &ksocknal_data.ksnd_peers[bkt], ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer_ni->ksnp_id.pid))) + if (!((LNET_NID_IS_ANY(&id->nid) || + nid_same(&id->nid, + &peer_ni->ksnp_id.nid)) && + (id->pid == LNET_PID_ANY || + id->pid == peer_ni->ksnp_id.pid))) continue; if (i++ == peer_off) { @@ -1688,7 +1706,8 @@ ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) { - struct lnet_process_id id = {0}; + struct lnet_process_id id4 = {}; + struct lnet_processid id = {}; struct libcfs_ioctl_data *data = arg; int rc; @@ -1720,43 +1739,43 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) return rc; } - case IOC_LIBCFS_GET_PEER: { - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; + case IOC_LIBCFS_GET_PEER: { + __u32 myip = 0; + __u32 ip = 0; + int port = 0; + int conn_count = 0; + int share_count = 0; - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc != 0) - return rc; + rc = ksocknal_get_peer_info(ni, data->ioc_count, + &id4, &myip, &ip, &port, + &conn_count, &share_count); + if (rc != 0) + return rc; - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } + data->ioc_nid = id4.nid; + data->ioc_count = share_count; + data->ioc_u32[0] = ip; + data->ioc_u32[1] = port; + data->ioc_u32[2] = myip; + data->ioc_u32[3] = conn_count; + data->ioc_u32[4] = id4.pid; + return 0; + } case IOC_LIBCFS_ADD_PEER: { struct sockaddr_in sa = {.sin_family = AF_INET}; - id.nid = data->ioc_nid; - id.pid = LNET_PID_LUSTRE; + id4.nid = data->ioc_nid; + id4.pid = LNET_PID_LUSTRE; sa.sin_addr.s_addr = htonl(data->ioc_u32[0]); sa.sin_port = htons(data->ioc_u32[1]); - return ksocknal_add_peer(ni, id, (struct sockaddr *)&sa); + return ksocknal_add_peer(ni, id4, (struct sockaddr *)&sa); } - case IOC_LIBCFS_DEL_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_del_peer (ni, id, - data->ioc_u32[0]); /* IP */ + case IOC_LIBCFS_DEL_PEER: + id4.nid = data->ioc_nid; + id4.pid = LNET_PID_ANY; + return ksocknal_del_peer(ni, id4, + data->ioc_u32[0]); /* IP */ case IOC_LIBCFS_GET_CONN: { int txmem; @@ -1771,9 +1790,9 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; + data->ioc_count = txmem; + data->ioc_nid = lnet_nid_to_nid4(&conn->ksnc_peer->ksnp_id.nid); + data->ioc_flags = nagle; if (psa->sin_family == AF_INET) data->ioc_u32[0] = ntohl(psa->sin_addr.s_addr); else @@ -1792,11 +1811,11 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) return 0; } - case IOC_LIBCFS_CLOSE_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_close_matching_conns (id, - data->ioc_u32[0]); + case IOC_LIBCFS_CLOSE_CONNECTION: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_close_matching_conns(&id, + data->ioc_u32[0]); case IOC_LIBCFS_REGISTER_MYNID: /* Ignore if this is a noop */ @@ -1809,15 +1828,15 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) libcfs_nidstr(&ni->ni_nid)); return -EINVAL; - case IOC_LIBCFS_PUSH_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_push(ni, id); + case IOC_LIBCFS_PUSH_CONNECTION: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_push(ni, &id); - default: - return -EINVAL; - } - /* not reached */ + default: + return -EINVAL; + } + /* not reached */ } static void @@ -2124,9 +2143,8 @@ ksocknal_debug_peerhash(struct lnet_ni *ni) if (peer_ni->ksnp_ni != ni) continue; - CWARN("Active peer_ni on shutdown: %s, ref %d, " - "closing %d, accepting %d, err %d, zcookie %llu, " - "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id), + CWARN("Active peer_ni on shutdown: %s, ref %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n", + libcfs_idstr(&peer_ni->ksnp_id), refcount_read(&peer_ni->ksnp_refcount), peer_ni->ksnp_closing, peer_ni->ksnp_accepting, peer_ni->ksnp_error, diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index ff9b901..00bacf3 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -393,7 +393,7 @@ struct ksock_conn_cb { struct ksock_peer_ni { struct hlist_node ksnp_list; /* stash on global peer_ni list */ time64_t ksnp_last_alive;/* when (in seconds) I was last alive */ - struct lnet_process_id ksnp_id; /* who's on the other end(s) */ + struct lnet_processid ksnp_id; /* who's on the other end(s) */ refcount_t ksnp_refcount; /* # users */ int ksnp_closing; /* being closed */ int ksnp_accepting; /* # passive connections pending */ @@ -597,9 +597,9 @@ int ksocknal_accept(struct lnet_ni *ni, struct socket *sock); int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, struct sockaddr *addr); struct ksock_peer_ni *ksocknal_find_peer_locked(struct lnet_ni *ni, - struct lnet_process_id id); + struct lnet_processid *id); struct ksock_peer_ni *ksocknal_find_peer(struct lnet_ni *ni, - struct lnet_process_id id); + struct lnet_processid *id); extern void ksocknal_peer_failed(struct ksock_peer_ni *peer_ni); extern int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, @@ -610,12 +610,12 @@ extern void ksocknal_destroy_conn(struct ksock_conn *conn); extern int ksocknal_close_peer_conns_locked(struct ksock_peer_ni *peer_ni, struct sockaddr *peer, int why); extern int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why); -int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr); +int ksocknal_close_matching_conns(struct lnet_processid *id, __u32 ipaddr); extern struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer_ni *peer_ni, struct ksock_tx *tx, int nonblk); extern int ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, - struct lnet_process_id id); + struct lnet_processid *id); extern struct ksock_tx *ksocknal_alloc_tx(int type, int size); extern void ksocknal_free_tx(struct ksock_tx *tx); extern struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 54fd6b6..ec33332 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -617,7 +617,7 @@ simulate_error: break; } CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pISp\n", - conn, rc, libcfs_id2str(conn->ksnc_peer->ksnp_id), + conn, rc, libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr); } @@ -752,7 +752,7 @@ ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn) LASSERT(!conn->ksnc_closing); CDEBUG(D_NET, "Sending to %s ip %pISp\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr); ksocknal_tx_prep(conn, tx); @@ -875,7 +875,7 @@ ksocknal_find_connecting_conn_cb_locked(struct ksock_peer_ni *peer_ni) int ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, - struct lnet_process_id id) + struct lnet_processid *id) { struct ksock_peer_ni *peer_ni; struct ksock_conn *conn; @@ -899,46 +899,52 @@ ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, * connecting and I do have an actual * connection... */ - ksocknal_queue_tx_locked (tx, conn); + ksocknal_queue_tx_locked(tx, conn); read_unlock(g_lock); - return (0); + return 0; } } } - /* I'll need a write lock... */ + /* I'll need a write lock... */ read_unlock(g_lock); write_lock_bh(g_lock); - peer_ni = ksocknal_find_peer_locked(ni, id); - if (peer_ni != NULL) - break; + peer_ni = ksocknal_find_peer_locked(ni, id); + if (peer_ni != NULL) + break; write_unlock_bh(g_lock); - if ((id.pid & LNET_PID_USERFLAG) != 0) { - CERROR("Refusing to create a connection to " - "userspace process %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } + if ((id->pid & LNET_PID_USERFLAG) != 0) { + CERROR("Refusing to create a connection to userspace process %s\n", + libcfs_idstr(id)); + return -EHOSTUNREACH; + } - if (retry) { - CERROR("Can't find peer_ni %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } + if (retry) { + CERROR("Can't find peer_ni %s\n", libcfs_idstr(id)); + return -EHOSTUNREACH; + } memset(&sa, 0, sizeof(sa)); sa.sin_family = AF_INET; - sa.sin_addr.s_addr = htonl(LNET_NIDADDR(id.nid)); + sa.sin_addr.s_addr = id->nid.nid_addr[0]; sa.sin_port = htons(lnet_acceptor_port()); - rc = ksocknal_add_peer(ni, id, (struct sockaddr *)&sa); - if (rc != 0) { - CERROR("Can't add peer_ni %s: %d\n", - libcfs_id2str(id), rc); - return rc; - } - } + { + struct lnet_process_id id4 = { + .pid = id->pid, + .nid = lnet_nid_to_nid4(&id->nid), + }; + rc = ksocknal_add_peer(ni, id4, (struct sockaddr *)&sa); + } + if (rc != 0) { + CERROR("Can't add peer_ni %s: %d\n", + libcfs_idstr(id), rc); + return rc; + } + } ksocknal_launch_all_connections_locked(peer_ni); @@ -965,7 +971,7 @@ ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, write_unlock_bh(g_lock); /* NB Routes may be ignored if connections to them failed recently */ - CNETERR("No usable routes to %s\n", libcfs_id2str(id)); + CNETERR("No usable routes to %s\n", libcfs_idstr(id)); tx->tx_hstatus = LNET_MSG_STATUS_REMOTE_ERROR; return (-EHOSTUNREACH); } @@ -976,7 +982,7 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) /* '1' for consistency with code that checks !mpflag to restore */ unsigned int mpflag = 1; int type = lntmsg->msg_type; - struct lnet_process_id target = lntmsg->msg_target; + struct lnet_processid target; unsigned int payload_niov = lntmsg->msg_niov; struct bio_vec *payload_kiov = lntmsg->msg_kiov; unsigned int payload_offset = lntmsg->msg_offset; @@ -985,11 +991,14 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) int desc_size; int rc; - /* NB 'private' is different depending on what we're sending. - * Just ignore it... */ + /* NB 'private' is different depending on what we're sending. + * Just ignore it... + */ + target.pid = lntmsg->msg_target.pid; + lnet_nid4_to_nid(lntmsg->msg_target.nid, &target.nid); - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); + CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", + payload_nob, payload_niov, libcfs_idstr(&target)); LASSERT (payload_nob == 0 || payload_niov > 0); LASSERT (payload_niov <= LNET_MAX_IOV); @@ -1028,7 +1037,7 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) tx->tx_msg.ksm_zc_cookies[1] = 0; /* The first fragment will be set later in pro_pack */ - rc = ksocknal_launch_packet(ni, tx, target); + rc = ksocknal_launch_packet(ni, tx, &target); /* * We can't test lntsmg->msg_vmflush again as lntmsg may * have been freed. @@ -1036,12 +1045,12 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) if (!mpflag) memalloc_noreclaim_restore(mpflag); - if (rc == 0) - return (0); + if (rc == 0) + return (0); lntmsg->msg_health_status = tx->tx_hstatus; - ksocknal_free_tx(tx); - return (-EIO); + ksocknal_free_tx(tx); + return -EIO; } void @@ -1138,7 +1147,7 @@ ksocknal_process_receive(struct ksock_conn *conn, struct kvec *scratch_iov) { struct lnet_hdr *lhdr; - struct lnet_process_id *id; + struct lnet_processid *id; int rc; LASSERT(refcount_read(&conn->ksnc_conn_refcount) > 0); @@ -1155,18 +1164,18 @@ ksocknal_process_receive(struct ksock_conn *conn, scratch_iov); if (rc <= 0) { - struct lnet_process_id ksnp_id; + struct lnet_processid *ksnp_id; - ksnp_id = conn->ksnc_peer->ksnp_id; + ksnp_id = &conn->ksnc_peer->ksnp_id; LASSERT(rc != -EAGAIN); if (rc == 0) CDEBUG(D_NET, "[%p] EOF from %s ip %pISp\n", - conn, libcfs_id2str(ksnp_id), + conn, libcfs_idstr(ksnp_id), &conn->ksnc_peeraddr); else if (!conn->ksnc_closing) CERROR("[%p] Error %d on read from %s ip %pISp\n", - conn, rc, libcfs_id2str(ksnp_id), + conn, rc, libcfs_idstr(ksnp_id), &conn->ksnc_peeraddr); /* it's not an error if conn is being closed */ @@ -1180,161 +1189,166 @@ ksocknal_process_receive(struct ksock_conn *conn, return (-EAGAIN); } } - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_flip) { - __swab32s(&conn->ksnc_msg.ksm_type); - __swab32s(&conn->ksnc_msg.ksm_csum); - __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]); - __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]); - } + switch (conn->ksnc_rx_state) { + case SOCKNAL_RX_KSM_HEADER: + if (conn->ksnc_flip) { + __swab32s(&conn->ksnc_msg.ksm_type); + __swab32s(&conn->ksnc_msg.ksm_csum); + __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]); + __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]); + } - if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) { - CERROR("%s: Unknown message type: %x\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_type); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (-EPROTO); - } + if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP && + conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) { + CERROR("%s: Unknown message type: %x\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + conn->ksnc_msg.ksm_type); + ksocknal_new_packet(conn, 0); + ksocknal_close_conn_and_siblings(conn, -EPROTO); + return (-EPROTO); + } - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - /* NOOP Checksum error */ - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (-EIO); - } + if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && + conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ + conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { + /* NOOP Checksum error */ + CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); + ksocknal_new_packet(conn, 0); + ksocknal_close_conn_and_siblings(conn, -EPROTO); + return (-EIO); + } - if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) { - __u64 cookie = 0; + if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) { + __u64 cookie = 0; - LASSERT (conn->ksnc_proto != &ksocknal_protocol_v1x); + LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) - cookie = conn->ksnc_msg.ksm_zc_cookies[0]; + if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) + cookie = conn->ksnc_msg.ksm_zc_cookies[0]; - rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie, - conn->ksnc_msg.ksm_zc_cookies[1]); + rc = conn->ksnc_proto->pro_handle_zcack( + conn, cookie, conn->ksnc_msg.ksm_zc_cookies[1]); - if (rc != 0) { + if (rc != 0) { CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - cookie, conn->ksnc_msg.ksm_zc_cookies[1]); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (rc); - } - } + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + cookie, + conn->ksnc_msg.ksm_zc_cookies[1]); + ksocknal_new_packet(conn, 0); + ksocknal_close_conn_and_siblings(conn, -EPROTO); + return rc; + } + } - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { - ksocknal_new_packet (conn, 0); - return 0; /* NOOP is done and just return */ - } + if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { + ksocknal_new_packet(conn, 0); + return 0; /* NOOP is done and just return */ + } - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; + conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; conn->ksnc_rx_nob_wanted = sizeof(struct ksock_lnet_msg); conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg); conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg; + conn->ksnc_rx_iov[0].iov_base = + (char *)&conn->ksnc_msg.ksm_u.lnetmsg; conn->ksnc_rx_iov[0].iov_len = sizeof(struct ksock_lnet_msg); - conn->ksnc_rx_niov = 1; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; + conn->ksnc_rx_niov = 1; + conn->ksnc_rx_kiov = NULL; + conn->ksnc_rx_nkiov = 0; - goto again; /* read lnet header now */ + goto again; /* read lnet header now */ - case SOCKNAL_RX_LNET_HEADER: - /* unpack message header */ - conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); + case SOCKNAL_RX_LNET_HEADER: + /* unpack message header */ + conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); - if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) { - /* Userspace peer_ni */ - lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - id = &conn->ksnc_peer->ksnp_id; + if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) { + /* Userspace peer_ni */ + lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; + id = &conn->ksnc_peer->ksnp_id; - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } + /* Substitute process ID assigned at connection time */ + lhdr->src_pid = cpu_to_le32(id->pid); + lhdr->src_nid = cpu_to_le64(lnet_nid_to_nid4(&id->nid)); + } - conn->ksnc_rx_state = SOCKNAL_RX_PARSE; - ksocknal_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->ksnc_peer->ksnp_ni, - &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->ksnc_peer->ksnp_id.nid, conn, 0); - if (rc < 0) { - /* I just received garbage: give up on this conn */ - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); - ksocknal_conn_decref(conn); - return (-EPROTO); - } + conn->ksnc_rx_state = SOCKNAL_RX_PARSE; + ksocknal_conn_addref(conn); /* ++ref while parsing */ - /* I'm racing with ksocknal_recv() */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); + rc = lnet_parse(conn->ksnc_peer->ksnp_ni, + &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr, + lnet_nid_to_nid4(&conn->ksnc_peer->ksnp_id.nid), + conn, 0); + if (rc < 0) { + /* I just received garbage: give up on this conn */ + ksocknal_new_packet(conn, 0); + ksocknal_close_conn_and_siblings(conn, rc); + ksocknal_conn_decref(conn); + return (-EPROTO); + } - if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) - return 0; + /* I'm racing with ksocknal_recv() */ + LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE || + conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); - /* ksocknal_recv() got called */ - goto again; + if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) + return 0; - case SOCKNAL_RX_LNET_PAYLOAD: - /* payload all received */ - rc = 0; - - if (conn->ksnc_rx_nob_left == 0 && /* not truncating */ - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - rc = -EIO; - } + /* ksocknal_recv() got called */ + goto again; - if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) { - LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); + case SOCKNAL_RX_LNET_PAYLOAD: + /* payload all received */ + rc = 0; + + if (conn->ksnc_rx_nob_left == 0 && /* not truncating */ + conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ + conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { + CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); + rc = -EIO; + } - lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - id = &conn->ksnc_peer->ksnp_id; + if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) { + LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); - rc = conn->ksnc_proto->pro_handle_zcreq(conn, - conn->ksnc_msg.ksm_zc_cookies[0], - *ksocknal_tunables.ksnd_nonblk_zcack || - le64_to_cpu(lhdr->src_nid) != id->nid); - } + lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; + id = &conn->ksnc_peer->ksnp_id; + + rc = conn->ksnc_proto->pro_handle_zcreq( + conn, + conn->ksnc_msg.ksm_zc_cookies[0], + *ksocknal_tunables.ksnd_nonblk_zcack || + le64_to_cpu(lhdr->src_nid) != + lnet_nid_to_nid4(&id->nid)); + } if (rc && conn->ksnc_lnet_msg) conn->ksnc_lnet_msg->msg_health_status = LNET_MSG_STATUS_REMOTE_ERROR; lnet_finalize(conn->ksnc_lnet_msg, rc); - if (rc != 0) { - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); - return (-EPROTO); - } - /* Fall through */ + if (rc != 0) { + ksocknal_new_packet(conn, 0); + ksocknal_close_conn_and_siblings(conn, rc); + return (-EPROTO); + } + /* Fall through */ - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) - return 0; /* come back later */ - goto again; /* try to finish reading slop now */ + case SOCKNAL_RX_SLOP: + /* starting new packet? */ + if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left)) + return 0; /* come back later */ + goto again; /* try to finish reading slop now */ - default: - break; - } + default: + break; + } /* Not Reached */ LBUG (); @@ -1922,7 +1936,8 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) if (peer_ni->ksnp_accepting > 0) { CDEBUG(D_NET, "peer_ni %s(%d) already connecting to me, retry later.\n", - libcfs_nid2str(peer_ni->ksnp_id.nid), peer_ni->ksnp_accepting); + libcfs_nidstr(&peer_ni->ksnp_id.nid), + peer_ni->ksnp_accepting); retry_later = true; } @@ -1945,13 +1960,13 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) if (ktime_get_seconds() >= deadline) { rc = -ETIMEDOUT; - lnet_connect_console_error(rc, peer_ni->ksnp_id.nid, - (struct sockaddr *) - &conn_cb->ksnr_addr); + lnet_connect_console_error( + rc, lnet_nid_to_nid4(&peer_ni->ksnp_id.nid), + (struct sockaddr *)&conn_cb->ksnr_addr); goto failed; } - sock = lnet_connect(peer_ni->ksnp_id.nid, + sock = lnet_connect(lnet_nid_to_nid4(&peer_ni->ksnp_id.nid), conn_cb->ksnr_myiface, (struct sockaddr *)&conn_cb->ksnr_addr, peer_ni->ksnp_ni->ni_net_ns); @@ -1963,18 +1978,19 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) rc = ksocknal_create_conn(peer_ni->ksnp_ni, conn_cb, sock, type); if (rc < 0) { - lnet_connect_console_error(rc, peer_ni->ksnp_id.nid, - (struct sockaddr *) - &conn_cb->ksnr_addr); + lnet_connect_console_error( + rc, lnet_nid_to_nid4(&peer_ni->ksnp_id.nid), + (struct sockaddr *)&conn_cb->ksnr_addr); goto failed; } /* A +ve RC means I have to retry because I lost the connection - * race or I have to renegotiate protocol version */ + * race or I have to renegotiate protocol version + */ retry_later = (rc != 0); if (retry_later) CDEBUG(D_NET, "peer_ni %s: conn race, retry later.\n", - libcfs_nid2str(peer_ni->ksnp_id.nid)); + libcfs_nidstr(&peer_ni->ksnp_id.nid)); write_lock_bh(&ksocknal_data.ksnd_global_lock); } @@ -2307,18 +2323,18 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni) switch (error) { case ECONNRESET: CNETERR("A connection with %s (%pISp) was reset; it may have rebooted.\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn->ksnc_peeraddr); break; case ETIMEDOUT: CNETERR("A connection with %s (%pISp) timed out; the network or node may be down.\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn->ksnc_peeraddr); break; default: CNETERR("An unexpected network error %d occurred with %s (%pISp\n", error, - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn->ksnc_peeraddr); break; } @@ -2331,7 +2347,7 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni) /* Timed out incomplete incoming message */ ksocknal_conn_addref(conn); CNETERR("Timeout receiving from %s (%pISp), state %d wanted %d left %d\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn->ksnc_peeraddr, conn->ksnc_rx_state, conn->ksnc_rx_nob_wanted, @@ -2351,7 +2367,7 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni) tx->tx_hstatus = LNET_MSG_STATUS_LOCAL_TIMEOUT; CNETERR("Timeout sending data to %s (%pISp) the network or that node may be down.\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn->ksnc_peeraddr); return conn; } @@ -2434,7 +2450,8 @@ __must_hold(&ksocknal_data.ksnd_global_lock) return -ENOMEM; } - if (ksocknal_launch_packet(peer_ni->ksnp_ni, tx, peer_ni->ksnp_id) == 0) { + if (ksocknal_launch_packet(peer_ni->ksnp_ni, tx, &peer_ni->ksnp_id) + == 0) { read_lock(&ksocknal_data.ksnd_global_lock); return 1; } @@ -2534,7 +2551,7 @@ ksocknal_check_peer_timeouts(int idx) CERROR("Total %d stale ZC_REQs for peer_ni %s detected; the " "oldest(%p) timed out %lld secs ago, " "resid: %d, wmem: %d\n", - n, libcfs_nid2str(peer_ni->ksnp_id.nid), tx_stale, + n, libcfs_nidstr(&peer_ni->ksnp_id.nid), tx_stale, ktime_get_seconds() - deadline, resid, conn->ksnc_sock->sk->sk_wmem_queued); diff --git a/lnet/klnds/socklnd/socklnd_proto.c b/lnet/klnds/socklnd/socklnd_proto.c index 533e8bb..d931ae8 100644 --- a/lnet/klnds/socklnd/socklnd_proto.c +++ b/lnet/klnds/socklnd/socklnd_proto.c @@ -188,12 +188,12 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, return 1; } - if (cookie == tx->tx_msg.ksm_zc_cookies[0] || - cookie == tx->tx_msg.ksm_zc_cookies[1]) { + if (cookie == tx->tx_msg.ksm_zc_cookies[0] || + cookie == tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX return error in the future */ - } + libcfs_idstr(&conn->ksnc_peer->ksnp_id), cookie); + return 1; /* XXX return error in the future */ + } if (tx->tx_msg.ksm_zc_cookies[0] == 0) { /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */ @@ -239,13 +239,16 @@ ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, } } else { - /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */ - if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && - cookie <= tx->tx_msg.ksm_zc_cookies[1]) { + /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is a range + * of cookies + */ + if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && + cookie <= tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX: return error in the future */ - } + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + cookie); + return 1; /* XXX: return error in the future */ + } if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { tx->tx_msg.ksm_zc_cookies[1] = cookie; @@ -391,11 +394,12 @@ ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote) if (tx == NULL) return -ENOMEM; - if ((rc = ksocknal_launch_packet(peer_ni->ksnp_ni, tx, peer_ni->ksnp_id)) == 0) - return 0; + rc = ksocknal_launch_packet(peer_ni->ksnp_ni, tx, &peer_ni->ksnp_id); + if (rc == 0) + return 0; - ksocknal_free_tx(tx); - return rc; + ksocknal_free_tx(tx); + return rc; } /* (Sender) handle ZC_ACK from sink */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 9f67e03..9908917 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1584,24 +1584,35 @@ lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni) } int -lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni) +lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni) { int cpt; int cpt2; - struct lnet_nid nid; if (LNET_CPT_NUMBER == 1) return 0; /* the only one */ - lnet_nid4_to_nid(nid4, &nid); cpt = lnet_net_lock_current(); - cpt2 = lnet_cpt_of_nid_locked(&nid, ni); + cpt2 = lnet_cpt_of_nid_locked(nid, ni); lnet_net_unlock(cpt); return cpt2; } +EXPORT_SYMBOL(lnet_nid2cpt); + +int +lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni) +{ + struct lnet_nid nid; + + if (LNET_CPT_NUMBER == 1) + return 0; /* the only one */ + + lnet_nid4_to_nid(nid4, &nid); + return lnet_nid2cpt(&nid, ni); +} EXPORT_SYMBOL(lnet_cpt_of_nid); int diff --git a/lnet/lnet/nidstrings.c b/lnet/lnet/nidstrings.c index c3df95d..84d6cec 100644 --- a/lnet/lnet/nidstrings.c +++ b/lnet/lnet/nidstrings.c @@ -1145,6 +1145,24 @@ libcfs_id2str(struct lnet_process_id id) } EXPORT_SYMBOL(libcfs_id2str); +char * +libcfs_idstr(struct lnet_processid *id) +{ + char *str = libcfs_next_nidstring(); + + if (id->pid == LNET_PID_ANY) { + snprintf(str, LNET_NIDSTR_SIZE, + "LNET_PID_ANY-%s", libcfs_nidstr(&id->nid)); + return str; + } + + snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", + ((id->pid & LNET_PID_USERFLAG) != 0) ? "U" : "", + (id->pid & ~LNET_PID_USERFLAG), libcfs_nidstr(&id->nid)); + return str; +} +EXPORT_SYMBOL(libcfs_idstr); + int libcfs_str2anynid(lnet_nid_t *nidp, const char *str) { -- 1.8.3.1