X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd_proto.c;h=e24f0b4523862185618fc78580654ae09932c4a2;hp=c60383e08f56ab3642c7939f79103ba144d4a2cd;hb=HEAD;hpb=9fb46705ae86aa2c0ac29427f0ff24f923560eb7 diff --git a/lnet/klnds/socklnd/socklnd_proto.c b/lnet/klnds/socklnd/socklnd_proto.c index c60383e..b90c3d2 100644 --- a/lnet/klnds/socklnd/socklnd_proto.c +++ b/lnet/klnds/socklnd/socklnd_proto.c @@ -1,12 +1,14 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * + * Copyright (c) 2012, 2017, Intel Corporation. + * * Author: Zach Brown * Author: Peter J. Braam * Author: Phil Schwan * Author: Eric Barton * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ + * This file is part of Lustre, https://wiki.whamcloud.com/ * * Portals is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -39,39 +41,39 @@ * pro_match_tx() : Called holding glock */ -static ksock_tx_t * -ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg) +static struct ksock_tx * +ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg) { /* V1.x, just enqueue it */ - cfs_list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); + list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); return NULL; } void -ksocknal_next_tx_carrier(ksock_conn_t *conn) +ksocknal_next_tx_carrier(struct ksock_conn *conn) { - ksock_tx_t *tx = conn->ksnc_tx_carrier; + struct ksock_tx *tx = conn->ksnc_tx_carrier; /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - LASSERT (!cfs_list_empty(&conn->ksnc_tx_queue)); - LASSERT (tx != NULL); + LASSERT(!list_empty(&conn->ksnc_tx_queue)); + LASSERT(tx != NULL); /* Next TX that can carry ZC-ACK or LNet message */ if (tx->tx_list.next == &conn->ksnc_tx_queue) { /* no more packets queued */ conn->ksnc_tx_carrier = NULL; } else { - conn->ksnc_tx_carrier = cfs_list_entry(tx->tx_list.next, - ksock_tx_t, tx_list); - LASSERT (conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type); + conn->ksnc_tx_carrier = list_next_entry(tx, tx_list); + LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == + tx->tx_msg.ksm_type); } } static int -ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, - ksock_tx_t *tx_ack, __u64 cookie) +ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn, + struct ksock_tx *tx_ack, __u64 cookie) { - ksock_tx_t *tx = conn->ksnc_tx_carrier; + struct ksock_tx *tx = conn->ksnc_tx_carrier; LASSERT (tx_ack == NULL || tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); @@ -85,7 +87,7 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, */ if (tx == NULL) { if (tx_ack != NULL) { - cfs_list_add_tail(&tx_ack->tx_list, + list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_ack; } @@ -95,7 +97,7 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { /* tx is noop zc-ack, can't piggyback zc-ack cookie */ if (tx_ack != NULL) - cfs_list_add_tail(&tx_ack->tx_list, + list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); return 0; } @@ -114,10 +116,10 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, return 1; } -static ksock_tx_t * -ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg) +static struct ksock_tx * +ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg) { - ksock_tx_t *tx = conn->ksnc_tx_carrier; + struct ksock_tx *tx = conn->ksnc_tx_carrier; /* * Enqueue tx_msg: @@ -127,13 +129,13 @@ ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg) * and replace the NOOP tx, and return the NOOP tx. */ if (tx == NULL) { /* nothing on queue */ - cfs_list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); + list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_msg; return NULL; } if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */ - cfs_list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); + list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); return NULL; } @@ -143,18 +145,17 @@ ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg) tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1]; ksocknal_next_tx_carrier(conn); - /* use new_tx to replace the noop zc-ack packet */ - cfs_list_add(&tx_msg->tx_list, &tx->tx_list); - cfs_list_del(&tx->tx_list); + /* use new_tx to replace the noop zc-ack packet */ + list_splice(&tx->tx_list, &tx_msg->tx_list); - return tx; + return tx; } static int -ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, - ksock_tx_t *tx_ack, __u64 cookie) +ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, + struct ksock_tx *tx_ack, __u64 cookie) { - ksock_tx_t *tx; + struct ksock_tx *tx; if (conn->ksnc_type != SOCKLND_CONN_ACK) return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); @@ -165,7 +166,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, if ((tx = conn->ksnc_tx_carrier) == NULL) { if (tx_ack != NULL) { - cfs_list_add_tail(&tx_ack->tx_list, + list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_ack; } @@ -187,12 +188,12 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, return 1; } - if (cookie == tx->tx_msg.ksm_zc_cookies[0] || - cookie == tx->tx_msg.ksm_zc_cookies[1]) { - CWARN("%s: duplicated ZC cookie: "LPU64"\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX return error in the future */ - } + if (cookie == tx->tx_msg.ksm_zc_cookies[0] || + cookie == tx->tx_msg.ksm_zc_cookies[1]) { + CWARN("%s: duplicated ZC cookie: %llu\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), cookie); + return 1; /* XXX return error in the future */ + } if (tx->tx_msg.ksm_zc_cookies[0] == 0) { /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */ @@ -216,7 +217,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) { __u64 tmp = 0; - /* two seperated cookies: (a+2, a) or (a+1, a) */ + /* two separated cookies: (a+2, a) or (a+1, a) */ LASSERT (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] <= 2); @@ -238,13 +239,16 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, } } else { - /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */ - if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && - cookie <= tx->tx_msg.ksm_zc_cookies[1]) { - CWARN("%s: duplicated ZC cookie: "LPU64"\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX: return error in the future */ - } + /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is a range + * of cookies + */ + if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && + cookie <= tx->tx_msg.ksm_zc_cookies[1]) { + CWARN("%s: duplicated ZC cookie: %llu\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + cookie); + return 1; /* XXX: return error in the future */ + } if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { tx->tx_msg.ksm_zc_cookies[1] = cookie; @@ -259,7 +263,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, /* failed to piggyback ZC-ACK */ if (tx_ack != NULL) { - cfs_list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); + list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); /* the next tx can piggyback at least 1 ACK */ ksocknal_next_tx_carrier(conn); } @@ -268,7 +272,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, } static int -ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) +ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) { int nob; @@ -277,14 +281,15 @@ ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) return SOCKNAL_MATCH_YES; #endif - if (tx == NULL || tx->tx_lnetmsg == NULL) { - /* noop packet */ - nob = offsetof(ksock_msg_t, ksm_u); - } else { - nob = tx->tx_lnetmsg->msg_len + - ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? - sizeof(lnet_hdr_t) : sizeof(ksock_msg_t)); - } + if (tx == NULL || tx->tx_lnetmsg == NULL) { + /* noop packet */ + nob = sizeof(struct ksock_msg_hdr); + } else { + nob = tx->tx_lnetmsg->msg_len + + ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? + 0 : sizeof(struct ksock_msg_hdr)) + + sizeof(struct lnet_hdr_nid4); + } /* default checking for typed connection */ switch (conn->ksnc_type) { @@ -312,14 +317,16 @@ ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) } static int -ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) +ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) { int nob; - if (tx == NULL || tx->tx_lnetmsg == NULL) - nob = offsetof(ksock_msg_t, ksm_u); - else - nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t); + if (tx == NULL || tx->tx_lnetmsg == NULL) + nob = sizeof(struct ksock_msg_hdr); + else + nob = sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid4) + + tx->tx_lnetmsg->msg_len; switch (conn->ksnc_type) { default: @@ -354,20 +361,65 @@ ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) } } +static int +ksocknal_match_tx_v4(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) +{ + int nob; + + if (!tx || !tx->tx_lnetmsg) + nob = sizeof(struct ksock_msg_hdr); + else + nob = sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid16) + + tx->tx_lnetmsg->msg_len; + + switch (conn->ksnc_type) { + default: + CERROR("ksnc_type bad: %u\n", conn->ksnc_type); + LBUG(); + case SOCKLND_CONN_ANY: + return SOCKNAL_MATCH_NO; + + case SOCKLND_CONN_ACK: + if (nonblk) + return SOCKNAL_MATCH_YES; + else if (tx == NULL || tx->tx_lnetmsg == NULL) + return SOCKNAL_MATCH_MAY; + else + return SOCKNAL_MATCH_NO; + + case SOCKLND_CONN_BULK_OUT: + if (nonblk) + return SOCKNAL_MATCH_NO; + else if (nob < *ksocknal_tunables.ksnd_min_bulk) + return SOCKNAL_MATCH_MAY; + else + return SOCKNAL_MATCH_YES; + + case SOCKLND_CONN_CONTROL: + if (nonblk) + return SOCKNAL_MATCH_NO; + else if (nob >= *ksocknal_tunables.ksnd_min_bulk) + return SOCKNAL_MATCH_MAY; + else + return SOCKNAL_MATCH_YES; + } +} + /* (Sink) handle incoming ZC request from sender */ static int -ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote) +ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote) { - ksock_peer_t *peer = c->ksnc_peer; - ksock_conn_t *conn; - ksock_tx_t *tx; - int rc; + struct ksock_peer_ni *peer_ni = c->ksnc_peer; + struct ksock_conn *conn; + struct ksock_tx *tx; + int rc; read_lock(&ksocknal_data.ksnd_global_lock); - conn = ksocknal_find_conn_locked(peer, NULL, !!remote); + conn = ksocknal_find_conn_locked(peer_ni, NULL, !!remote); if (conn != NULL) { - ksock_sched_t *sched = conn->ksnc_scheduler; + struct ksock_sched *sched = conn->ksnc_scheduler; LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL); @@ -390,55 +442,56 @@ ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote) if (tx == NULL) return -ENOMEM; - if ((rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) == 0) - return 0; + rc = ksocknal_launch_packet(peer_ni->ksnp_ni, tx, &peer_ni->ksnp_id); + if (rc == 0) + return 0; - ksocknal_free_tx(tx); - return rc; + ksocknal_free_tx(tx); + return rc; } /* (Sender) handle ZC_ACK from sink */ static int -ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) +ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2) { - ksock_peer_t *peer = conn->ksnc_peer; - ksock_tx_t *tx; - ksock_tx_t *tmp; - CFS_LIST_HEAD (zlist); - int count; + struct ksock_peer_ni *peer_ni = conn->ksnc_peer; + struct ksock_tx *tx; + struct ksock_tx *tmp; + LIST_HEAD(zlist); + int count; if (cookie1 == 0) cookie1 = cookie2; count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1); - if (cookie2 == SOCKNAL_KEEPALIVE_PING && - conn->ksnc_proto == &ksocknal_protocol_v3x) { - /* keepalive PING for V3.x, just ignore it */ - return count == 1 ? 0 : -EPROTO; - } + if (cookie2 == SOCKNAL_KEEPALIVE_PING && + (conn->ksnc_proto == &ksocknal_protocol_v3x || + conn->ksnc_proto == &ksocknal_protocol_v4x)) { + /* keepalive PING for V3.x, just ignore it */ + return count == 1 ? 0 : -EPROTO; + } - spin_lock(&peer->ksnp_lock); + spin_lock(&peer_ni->ksnp_lock); - cfs_list_for_each_entry_safe(tx, tmp, - &peer->ksnp_zc_req_list, tx_zc_list) { + list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, + tx_zc_list) { __u64 c = tx->tx_msg.ksm_zc_cookies[0]; if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) { tx->tx_msg.ksm_zc_cookies[0] = 0; - cfs_list_del(&tx->tx_zc_list); - cfs_list_add(&tx->tx_zc_list, &zlist); + list_move(&tx->tx_zc_list, &zlist); if (--count == 0) break; } } - spin_unlock(&peer->ksnp_lock); + spin_unlock(&peer_ni->ksnp_lock); - while (!cfs_list_empty(&zlist)) { - tx = cfs_list_entry(zlist.next, ksock_tx_t, tx_zc_list); - cfs_list_del(&tx->tx_zc_list); + while ((tx = list_first_entry_or_null(&zlist, struct ksock_tx, + tx_zc_list)) != NULL) { + list_del(&tx->tx_zc_list); ksocknal_tx_decref(tx); } @@ -446,313 +499,453 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) } static int -ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello) +ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello) { - cfs_socket_t *sock = conn->ksnc_sock; - lnet_hdr_t *hdr; - lnet_magicversion_t *hmv; - int rc; - int i; - - CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid)); - - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate lnet_hdr_t\n"); - return -ENOMEM; - } - - hmv = (lnet_magicversion_t *)&hdr->dest_nid; + struct socket *sock = conn->ksnc_sock; + struct _lnet_hdr_nid4 *hdr; + struct lnet_magicversion *hmv; + int rc; + int i; + + BUILD_BUG_ON(sizeof(struct lnet_magicversion) != + offsetof(struct _lnet_hdr_nid4, src_nid)); + + LIBCFS_ALLOC(hdr, sizeof(*hdr)); + if (hdr == NULL) { + CERROR("Can't allocate struct lnet_hdr_nid4\n"); + return -ENOMEM; + } - /* Re-organize V2.x message header to V1.x (lnet_hdr_t) - * header and send out */ - hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); + hmv = (struct lnet_magicversion *)&hdr->dest_nid; - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hmv->version_major++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - hmv->magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } + /* Re-organize V2.x message header to V1.x (struct lnet_hdr_nid4) + * header and send out + */ + hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); + hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); + hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); - hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); - hdr->type = cpu_to_le32 (LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); + if (the_lnet.ln_testprotocompat) { + /* single-shot proto check */ + if (test_and_clear_bit(0, &the_lnet.ln_testprotocompat)) + hmv->version_major++; /* just different! */ - rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),lnet_acceptor_timeout()); + if (test_and_clear_bit(1, &the_lnet.ln_testprotocompat)) + hmv->magic = LNET_PROTO_MAGIC; + } - if (rc != 0) { - CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - goto out; - } + hdr->src_nid = cpu_to_le64(lnet_nid_to_nid4(&hello->kshm_src_nid)); + hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); + hdr->type = cpu_to_le32 (LNET_MSG_HELLO); + hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); + hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); + hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); + + rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); + if (rc != 0) { + CNETERR("Error %d sending HELLO hdr to %pIScp\n", + rc, &conn->ksnc_peeraddr); + goto out; + } - if (hello->kshm_nips == 0) - goto out; + if (hello->kshm_nips == 0) + goto out; - for (i = 0; i < (int) hello->kshm_nips; i++) { - hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); - } + for (i = 0; i < (int) hello->kshm_nips; i++) + hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); - rc = libcfs_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CNETERR("Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - } + rc = lnet_sock_write(sock, hello->kshm_ips, + hello->kshm_nips * sizeof(__u32), + lnet_acceptor_timeout()); + if (rc != 0) { + CNETERR("Error %d sending HELLO payload (%d) to %pIScp\n", + rc, hello->kshm_nips, + &conn->ksnc_peeraddr); + } out: - LIBCFS_FREE(hdr, sizeof(*hdr)); + LIBCFS_FREE(hdr, sizeof(*hdr)); - return rc; + return rc; } static int -ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello) +ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello) { - cfs_socket_t *sock = conn->ksnc_sock; - int rc; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = conn->ksnc_proto->pro_version; - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hello->kshm_version++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - LNET_UNLOCK(); - } + struct socket *sock = conn->ksnc_sock; + int rc; + struct ksock_hello_msg_nid4 *hello4; + + CFS_ALLOC_PTR(hello4); + if (!hello4) { + CERROR("Can't allocate struct ksock_hello_msg_nid4\n"); + return -ENOMEM; + } + + hello->kshm_magic = LNET_PROTO_MAGIC; + hello->kshm_version = conn->ksnc_proto->pro_version; + + hello4->kshm_magic = LNET_PROTO_MAGIC; + hello4->kshm_version = conn->ksnc_proto->pro_version; + hello4->kshm_src_nid = lnet_nid_to_nid4(&hello->kshm_src_nid); + hello4->kshm_dst_nid = lnet_nid_to_nid4(&hello->kshm_dst_nid); + hello4->kshm_src_pid = hello->kshm_src_pid; + hello4->kshm_dst_pid = hello->kshm_dst_pid; + hello4->kshm_src_incarnation = hello->kshm_src_incarnation; + hello4->kshm_dst_incarnation = hello->kshm_dst_incarnation; + hello4->kshm_ctype = hello->kshm_ctype; + hello4->kshm_nips = hello->kshm_nips; + + if (the_lnet.ln_testprotocompat) { + /* single-shot proto check */ + if (test_and_clear_bit(0, &the_lnet.ln_testprotocompat)) + hello->kshm_version++; /* just different! */ + } + hello4->kshm_magic = LNET_PROTO_MAGIC; + hello4->kshm_version = hello->kshm_version; + hello4->kshm_src_nid = lnet_nid_to_nid4(&hello->kshm_src_nid); + hello4->kshm_dst_nid = lnet_nid_to_nid4(&hello->kshm_dst_nid); + hello4->kshm_src_pid = hello->kshm_src_pid; + hello4->kshm_dst_pid = hello->kshm_dst_pid; + hello4->kshm_src_incarnation = hello->kshm_src_incarnation; + hello4->kshm_dst_incarnation = hello->kshm_dst_incarnation; + hello4->kshm_ctype = hello->kshm_ctype; + hello4->kshm_nips = hello->kshm_nips; + + rc = lnet_sock_write(sock, hello4, sizeof(*hello4), + lnet_acceptor_timeout()); + CFS_FREE_PTR(hello4); + if (rc) { + CNETERR("Error %d sending HELLO hdr to %pIScp\n", + rc, &conn->ksnc_peeraddr); + return rc; + } - rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips), - lnet_acceptor_timeout()); + if (hello->kshm_nips == 0) + return 0; - if (rc != 0) { - CNETERR("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - return rc; - } + rc = lnet_sock_write(sock, hello->kshm_ips, + hello->kshm_nips * sizeof(__u32), + lnet_acceptor_timeout()); + if (rc != 0) { + CNETERR("Error %d sending HELLO payload (%d) to %pIScp\n", rc, + hello->kshm_nips, + &conn->ksnc_peeraddr); + } - if (hello->kshm_nips == 0) - return 0; + return rc; +} - rc = libcfs_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CNETERR("Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - } +static int +ksocknal_send_hello_v4(struct ksock_conn *conn, struct ksock_hello_msg *hello) +{ + struct socket *sock = conn->ksnc_sock; + int rc; + + hello->kshm_magic = LNET_PROTO_MAGIC; + hello->kshm_version = conn->ksnc_proto->pro_version; - return rc; + rc = lnet_sock_write(sock, hello, sizeof(*hello), + lnet_acceptor_timeout()); + + if (rc != 0) + CNETERR("Error %d sending HELLO hdr to %pIScp\n", + rc, &conn->ksnc_peeraddr); + return rc; } static int -ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout) +ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello, + int timeout) { - cfs_socket_t *sock = conn->ksnc_sock; - lnet_hdr_t *hdr; - int rc; - int i; - - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate lnet_hdr_t\n"); - return -ENOMEM; - } + struct socket *sock = conn->ksnc_sock; + struct _lnet_hdr_nid4 *hdr; + int rc; + int i; + + CFS_ALLOC_PTR(hdr); + if (!hdr) { + CERROR("Can't allocate struct lnet_hdr_nid4\n"); + return -ENOMEM; + } - rc = libcfs_sock_read(sock, &hdr->src_nid, - sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid), - timeout); - if (rc != 0) { - CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - goto out; - } + rc = lnet_sock_read(sock, &hdr->src_nid, + sizeof(*hdr) - offsetof(struct _lnet_hdr_nid4, + src_nid), + timeout); + if (rc != 0) { + CERROR("Error %d reading rest of HELLO hdr from %pISc\n", + rc, &conn->ksnc_peeraddr); + LASSERT(rc < 0 && rc != -EALREADY); + goto out; + } - /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr," - " but got type %d from %u.%u.%u.%u\n", - le32_to_cpu (hdr->type), - HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - goto out; - } + /* ...and check we got what we expected */ + if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { + CERROR("Expecting a HELLO hdr, but got type %d from %pISc\n", + le32_to_cpu(hdr->type), + &conn->ksnc_peeraddr); + rc = -EPROTO; + goto out; + } - hello->kshm_src_nid = le64_to_cpu (hdr->src_nid); - hello->kshm_src_pid = le32_to_cpu (hdr->src_pid); - hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation); - hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type); - hello->kshm_nips = le32_to_cpu (hdr->payload_length) / - sizeof (__u32); - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u\n", - hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - goto out; - } + lnet_nid4_to_nid(le64_to_cpu(hdr->src_nid), &hello->kshm_src_nid); + hello->kshm_src_pid = le32_to_cpu(hdr->src_pid); + hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation); + hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type); + hello->kshm_nips = le32_to_cpu(hdr->payload_length) / sizeof(__u32); + + if (hello->kshm_nips > LNET_INTERFACES_NUM) { + CERROR("Bad nips %d from ip %pISc\n", + hello->kshm_nips, &conn->ksnc_peeraddr); + rc = -EPROTO; + goto out; + } - if (hello->kshm_nips == 0) - goto out; + if (hello->kshm_nips == 0) + goto out; - rc = libcfs_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - goto out; - } + rc = lnet_sock_read(sock, hello->kshm_ips, + hello->kshm_nips * sizeof(__u32), timeout); + if (rc != 0) { + CERROR("Error %d reading IPs from ip %pISc\n", + rc, &conn->ksnc_peeraddr); + LASSERT(rc < 0 && rc != -EALREADY); + goto out; + } - for (i = 0; i < (int) hello->kshm_nips; i++) { - hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); + for (i = 0; i < (int) hello->kshm_nips; i++) { + hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n", - i, HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - break; - } - } + if (hello->kshm_ips[i] == 0) { + CERROR("Zero IP[%d] from ip %pISc\n", + i, &conn->ksnc_peeraddr); + rc = -EPROTO; + break; + } + } out: - LIBCFS_FREE(hdr, sizeof(*hdr)); + CFS_FREE_PTR(hdr); - return rc; + return rc; } static int -ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout) +ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello, + int timeout) { - cfs_socket_t *sock = conn->ksnc_sock; - int rc; - int i; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->ksnc_flip = 0; - else - conn->ksnc_flip = 1; - - rc = libcfs_sock_read(sock, &hello->kshm_src_nid, - offsetof(ksock_hello_msg_t, kshm_ips) - - offsetof(ksock_hello_msg_t, kshm_src_nid), - timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } + struct socket *sock = conn->ksnc_sock; + struct ksock_hello_msg_nid4 *hello4 = (void *)hello; + int rc; + int i; + + if (hello->kshm_magic == LNET_PROTO_MAGIC) + conn->ksnc_flip = 0; + else + conn->ksnc_flip = 1; + + rc = lnet_sock_read(sock, &hello4->kshm_src_nid, + offsetof(struct ksock_hello_msg_nid4, kshm_ips) - + offsetof(struct ksock_hello_msg_nid4, kshm_src_nid), + timeout); + if (rc != 0) { + CERROR("Error %d reading HELLO from %pISc\n", + rc, &conn->ksnc_peeraddr); + LASSERT(rc < 0 && rc != -EALREADY); + return rc; + } - if (conn->ksnc_flip) { - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } + if (conn->ksnc_flip) { + /* These must be copied in reverse order to avoid corruption. */ + hello->kshm_nips = __swab32(hello4->kshm_nips); + hello->kshm_ctype = __swab32(hello4->kshm_ctype); + hello->kshm_dst_incarnation = __swab64(hello4->kshm_dst_incarnation); + hello->kshm_src_incarnation = __swab64(hello4->kshm_src_incarnation); + hello->kshm_dst_pid = __swab32(hello4->kshm_dst_pid); + hello->kshm_src_pid = __swab32(hello4->kshm_src_pid); + lnet_nid4_to_nid(hello4->kshm_dst_nid, &hello->kshm_dst_nid); + lnet_nid4_to_nid(hello4->kshm_src_nid, &hello->kshm_src_nid); + } else { + /* These must be copied in reverse order to avoid corruption. */ + hello->kshm_nips = hello4->kshm_nips; + hello->kshm_ctype = hello4->kshm_ctype; + hello->kshm_dst_incarnation = hello4->kshm_dst_incarnation; + hello->kshm_src_incarnation = hello4->kshm_src_incarnation; + hello->kshm_dst_pid = hello4->kshm_dst_pid; + hello->kshm_src_pid = hello4->kshm_src_pid; + lnet_nid4_to_nid(hello4->kshm_dst_nid, &hello->kshm_dst_nid); + lnet_nid4_to_nid(hello4->kshm_src_nid, &hello->kshm_src_nid); + } - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u\n", - hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } + if (hello->kshm_nips > LNET_INTERFACES_NUM) { + CERROR("Bad nips %d from ip %pISc\n", + hello->kshm_nips, &conn->ksnc_peeraddr); + return -EPROTO; + } - if (hello->kshm_nips == 0) - return 0; + if (hello->kshm_nips == 0) + return 0; - rc = libcfs_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } + rc = lnet_sock_read(sock, hello->kshm_ips, + hello->kshm_nips * sizeof(__u32), timeout); + if (rc != 0) { + CERROR("Error %d reading IPs from ip %pISc\n", + rc, &conn->ksnc_peeraddr); + LASSERT(rc < 0 && rc != -EALREADY); + return rc; + } - for (i = 0; i < (int) hello->kshm_nips; i++) { - if (conn->ksnc_flip) - __swab32s(&hello->kshm_ips[i]); + for (i = 0; i < (int) hello->kshm_nips; i++) { + if (conn->ksnc_flip) + __swab32s(&hello->kshm_ips[i]); - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n", - i, HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } + if (hello->kshm_ips[i] == 0) { + CERROR("Zero IP[%d] from ip %pISc\n", + i, &conn->ksnc_peeraddr); + return -EPROTO; + } + } - return 0; + return 0; } -static void -ksocknal_pack_msg_v1(ksock_tx_t *tx) +static int +ksocknal_recv_hello_v4(struct ksock_conn *conn, struct ksock_hello_msg *hello, + int timeout) { - /* V1.x has no KSOCK_MSG_NOOP */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_lnetmsg != NULL); + struct socket *sock = conn->ksnc_sock; + int rc; + + if (hello->kshm_magic == LNET_PROTO_MAGIC) + conn->ksnc_flip = 0; + else + conn->ksnc_flip = 1; + + rc = lnet_sock_read(sock, &hello->kshm_src_nid, + sizeof(*hello) - + offsetof(struct ksock_hello_msg, kshm_src_nid), + timeout); + if (rc) { + CERROR("Error %d reading HELLO from %pISc\n", + rc, &conn->ksnc_peeraddr); + LASSERT(rc < 0 && rc != -EALREADY); + return rc; + } - tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t); + if (conn->ksnc_flip) { + __swab32s(&hello->kshm_src_pid); + __swab32s(&hello->kshm_dst_pid); + __swab64s(&hello->kshm_src_incarnation); + __swab64s(&hello->kshm_dst_incarnation); + __swab32s(&hello->kshm_ctype); + } - tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); + return 0; } static void -ksocknal_pack_msg_v2(ksock_tx_t *tx) +ksocknal_pack_msg_v1(struct ksock_tx *tx) { - tx->tx_iov[0].iov_base = (void *)&tx->tx_msg; + /* V1.x has no KSOCK_MSG_NOOP */ + LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); + LASSERT(tx->tx_lnetmsg != NULL); - if (tx->tx_lnetmsg != NULL) { - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); + lnet_hdr_to_nid4(&tx->tx_lnetmsg->msg_hdr, + &tx->tx_msg.ksm_u.lnetmsg_nid4); + tx->tx_hdr.iov_base = (void *)&tx->tx_msg.ksm_u.lnetmsg_nid4; + tx->tx_hdr.iov_len = sizeof(struct lnet_hdr_nid4); - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(ksock_msg_t); - tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len; - } else { - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); + tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr_nid4); + tx->tx_resid = tx->tx_nob; +} - tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - } - /* Don't checksum before start sending, because packet can be piggybacked with ACK */ +static void +ksocknal_pack_msg_v2(struct ksock_tx *tx) +{ + int hdr_size; + + tx->tx_hdr.iov_base = (void *)&tx->tx_msg; + + switch (tx->tx_msg.ksm_type) { + case KSOCK_MSG_LNET: + LASSERT(tx->tx_lnetmsg != NULL); + hdr_size = (sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid4)); + + lnet_hdr_to_nid4(&tx->tx_lnetmsg->msg_hdr, + &tx->tx_msg.ksm_u.lnetmsg_nid4); + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size + tx->tx_lnetmsg->msg_len; + break; + case KSOCK_MSG_NOOP: + LASSERT(tx->tx_lnetmsg == NULL); + hdr_size = sizeof(struct ksock_msg_hdr); + + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size; + break; + default: + LASSERT(0); + } + /* Don't checksum before start sending, because packet can be + * piggybacked with ACK + */ } static void -ksocknal_unpack_msg_v1(ksock_msg_t *msg) +ksocknal_pack_msg_v4(struct ksock_tx *tx) { - msg->ksm_csum = 0; - msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; + int hdr_size; + + tx->tx_hdr.iov_base = (void *)&tx->tx_msg; + + switch (tx->tx_msg.ksm_type) { + case KSOCK_MSG_LNET: + LASSERT(tx->tx_lnetmsg != NULL); + hdr_size = (sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid16)); + + lnet_hdr_to_nid16(&tx->tx_lnetmsg->msg_hdr, + &tx->tx_msg.ksm_u.lnetmsg_nid16); + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size + tx->tx_lnetmsg->msg_len; + break; + case KSOCK_MSG_NOOP: + LASSERT(tx->tx_lnetmsg == NULL); + hdr_size = sizeof(struct ksock_msg_hdr); + + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size; + break; + default: + LASSERT(0); + } + /* Don't checksum before start sending, because packet can be + * piggybacked with ACK + */ } static void -ksocknal_unpack_msg_v2(ksock_msg_t *msg) +ksocknal_unpack_msg_v1(struct ksock_msg *msg, struct lnet_hdr *hdr) { - return; /* Do nothing */ + msg->ksm_csum = 0; + msg->ksm_type = KSOCK_MSG_LNET; + msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; + lnet_hdr_from_nid4(hdr, &msg->ksm_u.lnetmsg_nid4); } -ksock_proto_t ksocknal_protocol_v1x = +static void +ksocknal_unpack_msg_v2(struct ksock_msg *msg, struct lnet_hdr *hdr) +{ + lnet_hdr_from_nid4(hdr, &msg->ksm_u.lnetmsg_nid4); +} + +static void +ksocknal_unpack_msg_v4(struct ksock_msg *msg, struct lnet_hdr *hdr) +{ + lnet_hdr_from_nid16(hdr, &msg->ksm_u.lnetmsg_nid16); +} + +const struct ksock_proto ksocknal_protocol_v1x = { .pro_version = KSOCK_PROTO_V1, .pro_send_hello = ksocknal_send_hello_v1, @@ -766,7 +959,7 @@ ksock_proto_t ksocknal_protocol_v1x = .pro_match_tx = ksocknal_match_tx }; -ksock_proto_t ksocknal_protocol_v2x = +const struct ksock_proto ksocknal_protocol_v2x = { .pro_version = KSOCK_PROTO_V2, .pro_send_hello = ksocknal_send_hello_v2, @@ -780,7 +973,7 @@ ksock_proto_t ksocknal_protocol_v2x = .pro_match_tx = ksocknal_match_tx }; -ksock_proto_t ksocknal_protocol_v3x = +const struct ksock_proto ksocknal_protocol_v3x = { .pro_version = KSOCK_PROTO_V3, .pro_send_hello = ksocknal_send_hello_v2, @@ -794,3 +987,15 @@ ksock_proto_t ksocknal_protocol_v3x = .pro_match_tx = ksocknal_match_tx_v3 }; +const struct ksock_proto ksocknal_protocol_v4x = { + .pro_version = KSOCK_PROTO_V4, + .pro_send_hello = ksocknal_send_hello_v4, + .pro_recv_hello = ksocknal_recv_hello_v4, + .pro_pack = ksocknal_pack_msg_v4, + .pro_unpack = ksocknal_unpack_msg_v4, + .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, + .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3, + .pro_handle_zcreq = ksocknal_handle_zcreq, + .pro_handle_zcack = ksocknal_handle_zcack, + .pro_match_tx = ksocknal_match_tx_v4, +};