X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd_proto.c;h=e24f0b4523862185618fc78580654ae09932c4a2;hp=98109ec2ff7bce390751fb3a9c05d1252e76cd23;hb=HEAD;hpb=057d89f5ecc9b28e5861fa037b1fc3512d6576ce diff --git a/lnet/klnds/socklnd/socklnd_proto.c b/lnet/klnds/socklnd/socklnd_proto.c index 98109ec..b90c3d2 100644 --- a/lnet/klnds/socklnd/socklnd_proto.c +++ b/lnet/klnds/socklnd/socklnd_proto.c @@ -1,14 +1,14 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. * * Author: Zach Brown * Author: Peter J. Braam * Author: Phil Schwan * Author: Eric Barton * - * This file is part of Lustre, https://wiki.hpdd.intel.com/ + * This file is part of Lustre, https://wiki.whamcloud.com/ * * Portals is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -41,8 +41,8 @@ * pro_match_tx() : Called holding glock */ -static ksock_tx_t * -ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg) +static struct ksock_tx * +ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg) { /* V1.x, just enqueue it */ list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); @@ -50,9 +50,9 @@ ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg) } void -ksocknal_next_tx_carrier(ksock_conn_t *conn) +ksocknal_next_tx_carrier(struct ksock_conn *conn) { - ksock_tx_t *tx = conn->ksnc_tx_carrier; + struct ksock_tx *tx = conn->ksnc_tx_carrier; /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ LASSERT(!list_empty(&conn->ksnc_tx_queue)); @@ -63,18 +63,17 @@ ksocknal_next_tx_carrier(ksock_conn_t *conn) /* no more packets queued */ conn->ksnc_tx_carrier = NULL; } else { - conn->ksnc_tx_carrier = list_entry(tx->tx_list.next, - ksock_tx_t, tx_list); + conn->ksnc_tx_carrier = list_next_entry(tx, tx_list); LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type); } } static int -ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, - ksock_tx_t *tx_ack, __u64 cookie) +ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn, + struct ksock_tx *tx_ack, __u64 cookie) { - ksock_tx_t *tx = conn->ksnc_tx_carrier; + struct ksock_tx *tx = conn->ksnc_tx_carrier; LASSERT (tx_ack == NULL || tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); @@ -117,10 +116,10 @@ ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn, return 1; } -static ksock_tx_t * -ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg) +static struct ksock_tx * +ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg) { - ksock_tx_t *tx = conn->ksnc_tx_carrier; + struct ksock_tx *tx = conn->ksnc_tx_carrier; /* * Enqueue tx_msg: @@ -146,18 +145,17 @@ ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg) tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1]; ksocknal_next_tx_carrier(conn); - /* use new_tx to replace the noop zc-ack packet */ - list_add(&tx_msg->tx_list, &tx->tx_list); - list_del(&tx->tx_list); + /* use new_tx to replace the noop zc-ack packet */ + list_splice(&tx->tx_list, &tx_msg->tx_list); - return tx; + return tx; } static int -ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, - ksock_tx_t *tx_ack, __u64 cookie) +ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, + struct ksock_tx *tx_ack, __u64 cookie) { - ksock_tx_t *tx; + struct ksock_tx *tx; if (conn->ksnc_type != SOCKLND_CONN_ACK) return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); @@ -190,12 +188,12 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, return 1; } - if (cookie == tx->tx_msg.ksm_zc_cookies[0] || - cookie == tx->tx_msg.ksm_zc_cookies[1]) { + if (cookie == tx->tx_msg.ksm_zc_cookies[0] || + cookie == tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX return error in the future */ - } + libcfs_idstr(&conn->ksnc_peer->ksnp_id), cookie); + return 1; /* XXX return error in the future */ + } if (tx->tx_msg.ksm_zc_cookies[0] == 0) { /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */ @@ -241,13 +239,16 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, } } else { - /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */ - if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && - cookie <= tx->tx_msg.ksm_zc_cookies[1]) { + /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is a range + * of cookies + */ + if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && + cookie <= tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX: return error in the future */ - } + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + cookie); + return 1; /* XXX: return error in the future */ + } if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { tx->tx_msg.ksm_zc_cookies[1] = cookie; @@ -271,7 +272,7 @@ ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn, } static int -ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) +ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) { int nob; @@ -280,14 +281,15 @@ ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) return SOCKNAL_MATCH_YES; #endif - if (tx == NULL || tx->tx_lnetmsg == NULL) { - /* noop packet */ - nob = offsetof(struct ksock_msg, ksm_u); - } else { - nob = tx->tx_lnetmsg->msg_len + - ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? - sizeof(struct lnet_hdr) : sizeof(struct ksock_msg)); - } + if (tx == NULL || tx->tx_lnetmsg == NULL) { + /* noop packet */ + nob = sizeof(struct ksock_msg_hdr); + } else { + nob = tx->tx_lnetmsg->msg_len + + ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? + 0 : sizeof(struct ksock_msg_hdr)) + + sizeof(struct lnet_hdr_nid4); + } /* default checking for typed connection */ switch (conn->ksnc_type) { @@ -315,14 +317,16 @@ ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) } static int -ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) +ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) { int nob; - if (tx == NULL || tx->tx_lnetmsg == NULL) - nob = offsetof(struct ksock_msg, ksm_u); - else - nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg); + if (tx == NULL || tx->tx_lnetmsg == NULL) + nob = sizeof(struct ksock_msg_hdr); + else + nob = sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid4) + + tx->tx_lnetmsg->msg_len; switch (conn->ksnc_type) { default: @@ -357,20 +361,65 @@ ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk) } } +static int +ksocknal_match_tx_v4(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) +{ + int nob; + + if (!tx || !tx->tx_lnetmsg) + nob = sizeof(struct ksock_msg_hdr); + else + nob = sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid16) + + tx->tx_lnetmsg->msg_len; + + switch (conn->ksnc_type) { + default: + CERROR("ksnc_type bad: %u\n", conn->ksnc_type); + LBUG(); + case SOCKLND_CONN_ANY: + return SOCKNAL_MATCH_NO; + + case SOCKLND_CONN_ACK: + if (nonblk) + return SOCKNAL_MATCH_YES; + else if (tx == NULL || tx->tx_lnetmsg == NULL) + return SOCKNAL_MATCH_MAY; + else + return SOCKNAL_MATCH_NO; + + case SOCKLND_CONN_BULK_OUT: + if (nonblk) + return SOCKNAL_MATCH_NO; + else if (nob < *ksocknal_tunables.ksnd_min_bulk) + return SOCKNAL_MATCH_MAY; + else + return SOCKNAL_MATCH_YES; + + case SOCKLND_CONN_CONTROL: + if (nonblk) + return SOCKNAL_MATCH_NO; + else if (nob >= *ksocknal_tunables.ksnd_min_bulk) + return SOCKNAL_MATCH_MAY; + else + return SOCKNAL_MATCH_YES; + } +} + /* (Sink) handle incoming ZC request from sender */ static int -ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote) +ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote) { - ksock_peer_ni_t *peer_ni = c->ksnc_peer; - ksock_conn_t *conn; - ksock_tx_t *tx; - int rc; + struct ksock_peer_ni *peer_ni = c->ksnc_peer; + struct ksock_conn *conn; + struct ksock_tx *tx; + int rc; read_lock(&ksocknal_data.ksnd_global_lock); conn = ksocknal_find_conn_locked(peer_ni, NULL, !!remote); if (conn != NULL) { - ksock_sched_t *sched = conn->ksnc_scheduler; + struct ksock_sched *sched = conn->ksnc_scheduler; LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL); @@ -393,44 +442,45 @@ ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote) if (tx == NULL) return -ENOMEM; - if ((rc = ksocknal_launch_packet(peer_ni->ksnp_ni, tx, peer_ni->ksnp_id)) == 0) - return 0; + rc = ksocknal_launch_packet(peer_ni->ksnp_ni, tx, &peer_ni->ksnp_id); + if (rc == 0) + return 0; - ksocknal_free_tx(tx); - return rc; + ksocknal_free_tx(tx); + return rc; } /* (Sender) handle ZC_ACK from sink */ static int -ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) +ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2) { - ksock_peer_ni_t *peer_ni = conn->ksnc_peer; - ksock_tx_t *tx; - ksock_tx_t *tmp; - struct list_head zlist = LIST_HEAD_INIT(zlist); - int count; + struct ksock_peer_ni *peer_ni = conn->ksnc_peer; + struct ksock_tx *tx; + struct ksock_tx *tmp; + LIST_HEAD(zlist); + int count; if (cookie1 == 0) cookie1 = cookie2; count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1); - if (cookie2 == SOCKNAL_KEEPALIVE_PING && - conn->ksnc_proto == &ksocknal_protocol_v3x) { - /* keepalive PING for V3.x, just ignore it */ - return count == 1 ? 0 : -EPROTO; - } + if (cookie2 == SOCKNAL_KEEPALIVE_PING && + (conn->ksnc_proto == &ksocknal_protocol_v3x || + conn->ksnc_proto == &ksocknal_protocol_v4x)) { + /* keepalive PING for V3.x, just ignore it */ + return count == 1 ? 0 : -EPROTO; + } spin_lock(&peer_ni->ksnp_lock); - list_for_each_entry_safe(tx, tmp, - &peer_ni->ksnp_zc_req_list, tx_zc_list) { + list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, + tx_zc_list) { __u64 c = tx->tx_msg.ksm_zc_cookies[0]; if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) { tx->tx_msg.ksm_zc_cookies[0] = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); + list_move(&tx->tx_zc_list, &zlist); if (--count == 0) break; @@ -439,8 +489,8 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) spin_unlock(&peer_ni->ksnp_lock); - while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list); + while ((tx = list_first_entry_or_null(&zlist, struct ksock_tx, + tx_zc_list)) != NULL) { list_del(&tx->tx_zc_list); ksocknal_tx_decref(tx); } @@ -449,315 +499,453 @@ ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2) } static int -ksocknal_send_hello_v1 (ksock_conn_t *conn, struct ksock_hello_msg *hello) +ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello) { struct socket *sock = conn->ksnc_sock; - struct lnet_hdr *hdr; + struct _lnet_hdr_nid4 *hdr; struct lnet_magicversion *hmv; int rc; int i; - CLASSERT(sizeof(struct lnet_magicversion) == - offsetof(struct lnet_hdr, src_nid)); + BUILD_BUG_ON(sizeof(struct lnet_magicversion) != + offsetof(struct _lnet_hdr_nid4, src_nid)); LIBCFS_ALLOC(hdr, sizeof(*hdr)); if (hdr == NULL) { - CERROR("Can't allocate struct lnet_hdr\n"); + CERROR("Can't allocate struct lnet_hdr_nid4\n"); return -ENOMEM; } hmv = (struct lnet_magicversion *)&hdr->dest_nid; - /* Re-organize V2.x message header to V1.x (struct lnet_hdr) - * header and send out */ - hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hmv->version_major++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - hmv->magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } + /* Re-organize V2.x message header to V1.x (struct lnet_hdr_nid4) + * header and send out + */ + hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); + hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); + hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); + + if (the_lnet.ln_testprotocompat) { + /* single-shot proto check */ + if (test_and_clear_bit(0, &the_lnet.ln_testprotocompat)) + hmv->version_major++; /* just different! */ + + if (test_and_clear_bit(1, &the_lnet.ln_testprotocompat)) + hmv->magic = LNET_PROTO_MAGIC; + } - hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); - hdr->type = cpu_to_le32 (LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); + hdr->src_nid = cpu_to_le64(lnet_nid_to_nid4(&hello->kshm_src_nid)); + hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); + hdr->type = cpu_to_le32 (LNET_MSG_HELLO); + hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); + hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); + hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); - if (rc != 0) { - CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", - rc, &conn->ksnc_ipaddr, conn->ksnc_port); - goto out; - } + if (rc != 0) { + CNETERR("Error %d sending HELLO hdr to %pIScp\n", + rc, &conn->ksnc_peeraddr); + goto out; + } - if (hello->kshm_nips == 0) - goto out; + if (hello->kshm_nips == 0) + goto out; - for (i = 0; i < (int) hello->kshm_nips; i++) { - hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); - } + for (i = 0; i < (int) hello->kshm_nips; i++) + hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); rc = lnet_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CNETERR("Error %d sending HELLO payload (%d)" - " to %pI4h/%d\n", rc, hello->kshm_nips, - &conn->ksnc_ipaddr, conn->ksnc_port); - } + hello->kshm_nips * sizeof(__u32), + lnet_acceptor_timeout()); + if (rc != 0) { + CNETERR("Error %d sending HELLO payload (%d) to %pIScp\n", + rc, hello->kshm_nips, + &conn->ksnc_peeraddr); + } out: - LIBCFS_FREE(hdr, sizeof(*hdr)); + LIBCFS_FREE(hdr, sizeof(*hdr)); - return rc; + return rc; } static int -ksocknal_send_hello_v2 (ksock_conn_t *conn, struct ksock_hello_msg *hello) +ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello) { - struct socket *sock = conn->ksnc_sock; - int rc; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = conn->ksnc_proto->pro_version; - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hello->kshm_version++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - LNET_UNLOCK(); - } + struct socket *sock = conn->ksnc_sock; + int rc; + struct ksock_hello_msg_nid4 *hello4; - rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips), - lnet_acceptor_timeout()); + CFS_ALLOC_PTR(hello4); + if (!hello4) { + CERROR("Can't allocate struct ksock_hello_msg_nid4\n"); + return -ENOMEM; + } - if (rc != 0) { - CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", - rc, &conn->ksnc_ipaddr, conn->ksnc_port); - return rc; - } + hello->kshm_magic = LNET_PROTO_MAGIC; + hello->kshm_version = conn->ksnc_proto->pro_version; + + hello4->kshm_magic = LNET_PROTO_MAGIC; + hello4->kshm_version = conn->ksnc_proto->pro_version; + hello4->kshm_src_nid = lnet_nid_to_nid4(&hello->kshm_src_nid); + hello4->kshm_dst_nid = lnet_nid_to_nid4(&hello->kshm_dst_nid); + hello4->kshm_src_pid = hello->kshm_src_pid; + hello4->kshm_dst_pid = hello->kshm_dst_pid; + hello4->kshm_src_incarnation = hello->kshm_src_incarnation; + hello4->kshm_dst_incarnation = hello->kshm_dst_incarnation; + hello4->kshm_ctype = hello->kshm_ctype; + hello4->kshm_nips = hello->kshm_nips; + + if (the_lnet.ln_testprotocompat) { + /* single-shot proto check */ + if (test_and_clear_bit(0, &the_lnet.ln_testprotocompat)) + hello->kshm_version++; /* just different! */ + } + hello4->kshm_magic = LNET_PROTO_MAGIC; + hello4->kshm_version = hello->kshm_version; + hello4->kshm_src_nid = lnet_nid_to_nid4(&hello->kshm_src_nid); + hello4->kshm_dst_nid = lnet_nid_to_nid4(&hello->kshm_dst_nid); + hello4->kshm_src_pid = hello->kshm_src_pid; + hello4->kshm_dst_pid = hello->kshm_dst_pid; + hello4->kshm_src_incarnation = hello->kshm_src_incarnation; + hello4->kshm_dst_incarnation = hello->kshm_dst_incarnation; + hello4->kshm_ctype = hello->kshm_ctype; + hello4->kshm_nips = hello->kshm_nips; + + rc = lnet_sock_write(sock, hello4, sizeof(*hello4), + lnet_acceptor_timeout()); + CFS_FREE_PTR(hello4); + if (rc) { + CNETERR("Error %d sending HELLO hdr to %pIScp\n", + rc, &conn->ksnc_peeraddr); + return rc; + } - if (hello->kshm_nips == 0) - return 0; + if (hello->kshm_nips == 0) + return 0; rc = lnet_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CNETERR("Error %d sending HELLO payload (%d)" - " to %pI4h/%d\n", rc, hello->kshm_nips, - &conn->ksnc_ipaddr, conn->ksnc_port); - } + hello->kshm_nips * sizeof(__u32), + lnet_acceptor_timeout()); + if (rc != 0) { + CNETERR("Error %d sending HELLO payload (%d) to %pIScp\n", rc, + hello->kshm_nips, + &conn->ksnc_peeraddr); + } - return rc; + return rc; } static int -ksocknal_recv_hello_v1(ksock_conn_t *conn, struct ksock_hello_msg *hello,int timeout) +ksocknal_send_hello_v4(struct ksock_conn *conn, struct ksock_hello_msg *hello) { struct socket *sock = conn->ksnc_sock; - struct lnet_hdr *hdr; + int rc; + + hello->kshm_magic = LNET_PROTO_MAGIC; + hello->kshm_version = conn->ksnc_proto->pro_version; + + rc = lnet_sock_write(sock, hello, sizeof(*hello), + lnet_acceptor_timeout()); + + if (rc != 0) + CNETERR("Error %d sending HELLO hdr to %pIScp\n", + rc, &conn->ksnc_peeraddr); + return rc; +} + +static int +ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello, + int timeout) +{ + struct socket *sock = conn->ksnc_sock; + struct _lnet_hdr_nid4 *hdr; int rc; int i; - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate struct lnet_hdr\n"); + CFS_ALLOC_PTR(hdr); + if (!hdr) { + CERROR("Can't allocate struct lnet_hdr_nid4\n"); return -ENOMEM; } rc = lnet_sock_read(sock, &hdr->src_nid, - sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid), - timeout); + sizeof(*hdr) - offsetof(struct _lnet_hdr_nid4, + src_nid), + timeout); if (rc != 0) { - CERROR("Error %d reading rest of HELLO hdr from %pI4h\n", - rc, &conn->ksnc_ipaddr); + CERROR("Error %d reading rest of HELLO hdr from %pISc\n", + rc, &conn->ksnc_peeraddr); LASSERT(rc < 0 && rc != -EALREADY); goto out; } - /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr," - " but got type %d from %pI4h\n", - le32_to_cpu (hdr->type), - &conn->ksnc_ipaddr); - rc = -EPROTO; - goto out; - } + /* ...and check we got what we expected */ + if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { + CERROR("Expecting a HELLO hdr, but got type %d from %pISc\n", + le32_to_cpu(hdr->type), + &conn->ksnc_peeraddr); + rc = -EPROTO; + goto out; + } - hello->kshm_src_nid = le64_to_cpu (hdr->src_nid); - hello->kshm_src_pid = le32_to_cpu (hdr->src_pid); - hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation); - hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type); - hello->kshm_nips = le32_to_cpu (hdr->payload_length) / - sizeof (__u32); + lnet_nid4_to_nid(le64_to_cpu(hdr->src_nid), &hello->kshm_src_nid); + hello->kshm_src_pid = le32_to_cpu(hdr->src_pid); + hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation); + hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type); + hello->kshm_nips = le32_to_cpu(hdr->payload_length) / sizeof(__u32); - if (hello->kshm_nips > LNET_NUM_INTERFACES) { - CERROR("Bad nips %d from ip %pI4h\n", - hello->kshm_nips, &conn->ksnc_ipaddr); + if (hello->kshm_nips > LNET_INTERFACES_NUM) { + CERROR("Bad nips %d from ip %pISc\n", + hello->kshm_nips, &conn->ksnc_peeraddr); rc = -EPROTO; goto out; } - if (hello->kshm_nips == 0) - goto out; + if (hello->kshm_nips == 0) + goto out; rc = lnet_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); + hello->kshm_nips * sizeof(__u32), timeout); + if (rc != 0) { + CERROR("Error %d reading IPs from ip %pISc\n", + rc, &conn->ksnc_peeraddr); LASSERT(rc < 0 && rc != -EALREADY); - goto out; - } + goto out; + } - for (i = 0; i < (int) hello->kshm_nips; i++) { - hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); + for (i = 0; i < (int) hello->kshm_nips; i++) { + hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %pI4h\n", - i, &conn->ksnc_ipaddr); - rc = -EPROTO; - break; - } - } + if (hello->kshm_ips[i] == 0) { + CERROR("Zero IP[%d] from ip %pISc\n", + i, &conn->ksnc_peeraddr); + rc = -EPROTO; + break; + } + } out: - LIBCFS_FREE(hdr, sizeof(*hdr)); + CFS_FREE_PTR(hdr); - return rc; + return rc; } static int -ksocknal_recv_hello_v2(ksock_conn_t *conn, struct ksock_hello_msg *hello, +ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello, int timeout) { - struct socket *sock = conn->ksnc_sock; - int rc; - int i; + struct socket *sock = conn->ksnc_sock; + struct ksock_hello_msg_nid4 *hello4 = (void *)hello; + int rc; + int i; - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->ksnc_flip = 0; - else - conn->ksnc_flip = 1; + if (hello->kshm_magic == LNET_PROTO_MAGIC) + conn->ksnc_flip = 0; + else + conn->ksnc_flip = 1; - rc = lnet_sock_read(sock, &hello->kshm_src_nid, - offsetof(struct ksock_hello_msg, kshm_ips) - - offsetof(struct ksock_hello_msg, kshm_src_nid), - timeout); - if (rc != 0) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); + rc = lnet_sock_read(sock, &hello4->kshm_src_nid, + offsetof(struct ksock_hello_msg_nid4, kshm_ips) - + offsetof(struct ksock_hello_msg_nid4, kshm_src_nid), + timeout); + if (rc != 0) { + CERROR("Error %d reading HELLO from %pISc\n", + rc, &conn->ksnc_peeraddr); LASSERT(rc < 0 && rc != -EALREADY); - return rc; - } + return rc; + } - if (conn->ksnc_flip) { - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } + if (conn->ksnc_flip) { + /* These must be copied in reverse order to avoid corruption. */ + hello->kshm_nips = __swab32(hello4->kshm_nips); + hello->kshm_ctype = __swab32(hello4->kshm_ctype); + hello->kshm_dst_incarnation = __swab64(hello4->kshm_dst_incarnation); + hello->kshm_src_incarnation = __swab64(hello4->kshm_src_incarnation); + hello->kshm_dst_pid = __swab32(hello4->kshm_dst_pid); + hello->kshm_src_pid = __swab32(hello4->kshm_src_pid); + lnet_nid4_to_nid(hello4->kshm_dst_nid, &hello->kshm_dst_nid); + lnet_nid4_to_nid(hello4->kshm_src_nid, &hello->kshm_src_nid); + } else { + /* These must be copied in reverse order to avoid corruption. */ + hello->kshm_nips = hello4->kshm_nips; + hello->kshm_ctype = hello4->kshm_ctype; + hello->kshm_dst_incarnation = hello4->kshm_dst_incarnation; + hello->kshm_src_incarnation = hello4->kshm_src_incarnation; + hello->kshm_dst_pid = hello4->kshm_dst_pid; + hello->kshm_src_pid = hello4->kshm_src_pid; + lnet_nid4_to_nid(hello4->kshm_dst_nid, &hello->kshm_dst_nid); + lnet_nid4_to_nid(hello4->kshm_src_nid, &hello->kshm_src_nid); + } - if (hello->kshm_nips > LNET_NUM_INTERFACES) { - CERROR("Bad nips %d from ip %pI4h\n", - hello->kshm_nips, &conn->ksnc_ipaddr); + if (hello->kshm_nips > LNET_INTERFACES_NUM) { + CERROR("Bad nips %d from ip %pISc\n", + hello->kshm_nips, &conn->ksnc_peeraddr); return -EPROTO; } - if (hello->kshm_nips == 0) - return 0; + if (hello->kshm_nips == 0) + return 0; rc = lnet_sock_read(sock, hello->kshm_ips, hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); + if (rc != 0) { + CERROR("Error %d reading IPs from ip %pISc\n", + rc, &conn->ksnc_peeraddr); LASSERT(rc < 0 && rc != -EALREADY); - return rc; - } + return rc; + } - for (i = 0; i < (int) hello->kshm_nips; i++) { - if (conn->ksnc_flip) - __swab32s(&hello->kshm_ips[i]); + for (i = 0; i < (int) hello->kshm_nips; i++) { + if (conn->ksnc_flip) + __swab32s(&hello->kshm_ips[i]); - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %pI4h\n", - i, &conn->ksnc_ipaddr); - return -EPROTO; - } - } + if (hello->kshm_ips[i] == 0) { + CERROR("Zero IP[%d] from ip %pISc\n", + i, &conn->ksnc_peeraddr); + return -EPROTO; + } + } - return 0; + return 0; +} + +static int +ksocknal_recv_hello_v4(struct ksock_conn *conn, struct ksock_hello_msg *hello, + int timeout) +{ + struct socket *sock = conn->ksnc_sock; + int rc; + + if (hello->kshm_magic == LNET_PROTO_MAGIC) + conn->ksnc_flip = 0; + else + conn->ksnc_flip = 1; + + rc = lnet_sock_read(sock, &hello->kshm_src_nid, + sizeof(*hello) - + offsetof(struct ksock_hello_msg, kshm_src_nid), + timeout); + if (rc) { + CERROR("Error %d reading HELLO from %pISc\n", + rc, &conn->ksnc_peeraddr); + LASSERT(rc < 0 && rc != -EALREADY); + return rc; + } + + if (conn->ksnc_flip) { + __swab32s(&hello->kshm_src_pid); + __swab32s(&hello->kshm_dst_pid); + __swab64s(&hello->kshm_src_incarnation); + __swab64s(&hello->kshm_dst_incarnation); + __swab32s(&hello->kshm_ctype); + } + + return 0; } static void -ksocknal_pack_msg_v1(ksock_tx_t *tx) +ksocknal_pack_msg_v1(struct ksock_tx *tx) { /* V1.x has no KSOCK_MSG_NOOP */ LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); LASSERT(tx->tx_lnetmsg != NULL); - tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(struct lnet_hdr); + lnet_hdr_to_nid4(&tx->tx_lnetmsg->msg_hdr, + &tx->tx_msg.ksm_u.lnetmsg_nid4); + tx->tx_hdr.iov_base = (void *)&tx->tx_msg.ksm_u.lnetmsg_nid4; + tx->tx_hdr.iov_len = sizeof(struct lnet_hdr_nid4); - tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr); + tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr_nid4); tx->tx_resid = tx->tx_nob; } static void -ksocknal_pack_msg_v2(ksock_tx_t *tx) +ksocknal_pack_msg_v2(struct ksock_tx *tx) { - tx->tx_iov[0].iov_base = (void *)&tx->tx_msg; - - if (tx->tx_lnetmsg != NULL) { - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); + int hdr_size; + + tx->tx_hdr.iov_base = (void *)&tx->tx_msg; + + switch (tx->tx_msg.ksm_type) { + case KSOCK_MSG_LNET: + LASSERT(tx->tx_lnetmsg != NULL); + hdr_size = (sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid4)); + + lnet_hdr_to_nid4(&tx->tx_lnetmsg->msg_hdr, + &tx->tx_msg.ksm_u.lnetmsg_nid4); + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size + tx->tx_lnetmsg->msg_len; + break; + case KSOCK_MSG_NOOP: + LASSERT(tx->tx_lnetmsg == NULL); + hdr_size = sizeof(struct ksock_msg_hdr); + + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size; + break; + default: + LASSERT(0); + } + /* Don't checksum before start sending, because packet can be + * piggybacked with ACK + */ +} - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(struct ksock_msg); - tx->tx_resid = tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len; - } else { - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); +static void +ksocknal_pack_msg_v4(struct ksock_tx *tx) +{ + int hdr_size; + + tx->tx_hdr.iov_base = (void *)&tx->tx_msg; + + switch (tx->tx_msg.ksm_type) { + case KSOCK_MSG_LNET: + LASSERT(tx->tx_lnetmsg != NULL); + hdr_size = (sizeof(struct ksock_msg_hdr) + + sizeof(struct lnet_hdr_nid16)); + + lnet_hdr_to_nid16(&tx->tx_lnetmsg->msg_hdr, + &tx->tx_msg.ksm_u.lnetmsg_nid16); + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size + tx->tx_lnetmsg->msg_len; + break; + case KSOCK_MSG_NOOP: + LASSERT(tx->tx_lnetmsg == NULL); + hdr_size = sizeof(struct ksock_msg_hdr); + + tx->tx_hdr.iov_len = hdr_size; + tx->tx_resid = tx->tx_nob = hdr_size; + break; + default: + LASSERT(0); + } + /* Don't checksum before start sending, because packet can be + * piggybacked with ACK + */ +} - tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = tx->tx_nob = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - } - /* Don't checksum before start sending, because packet can be piggybacked with ACK */ +static void +ksocknal_unpack_msg_v1(struct ksock_msg *msg, struct lnet_hdr *hdr) +{ + msg->ksm_csum = 0; + msg->ksm_type = KSOCK_MSG_LNET; + msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; + lnet_hdr_from_nid4(hdr, &msg->ksm_u.lnetmsg_nid4); } static void -ksocknal_unpack_msg_v1(struct ksock_msg *msg) +ksocknal_unpack_msg_v2(struct ksock_msg *msg, struct lnet_hdr *hdr) { - msg->ksm_csum = 0; - msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_zc_cookies[0] = msg->ksm_zc_cookies[1] = 0; + lnet_hdr_from_nid4(hdr, &msg->ksm_u.lnetmsg_nid4); } static void -ksocknal_unpack_msg_v2(struct ksock_msg *msg) +ksocknal_unpack_msg_v4(struct ksock_msg *msg, struct lnet_hdr *hdr) { - return; /* Do nothing */ + lnet_hdr_from_nid16(hdr, &msg->ksm_u.lnetmsg_nid16); } -ksock_proto_t ksocknal_protocol_v1x = +const struct ksock_proto ksocknal_protocol_v1x = { .pro_version = KSOCK_PROTO_V1, .pro_send_hello = ksocknal_send_hello_v1, @@ -771,7 +959,7 @@ ksock_proto_t ksocknal_protocol_v1x = .pro_match_tx = ksocknal_match_tx }; -ksock_proto_t ksocknal_protocol_v2x = +const struct ksock_proto ksocknal_protocol_v2x = { .pro_version = KSOCK_PROTO_V2, .pro_send_hello = ksocknal_send_hello_v2, @@ -785,7 +973,7 @@ ksock_proto_t ksocknal_protocol_v2x = .pro_match_tx = ksocknal_match_tx }; -ksock_proto_t ksocknal_protocol_v3x = +const struct ksock_proto ksocknal_protocol_v3x = { .pro_version = KSOCK_PROTO_V3, .pro_send_hello = ksocknal_send_hello_v2, @@ -799,3 +987,15 @@ ksock_proto_t ksocknal_protocol_v3x = .pro_match_tx = ksocknal_match_tx_v3 }; +const struct ksock_proto ksocknal_protocol_v4x = { + .pro_version = KSOCK_PROTO_V4, + .pro_send_hello = ksocknal_send_hello_v4, + .pro_recv_hello = ksocknal_recv_hello_v4, + .pro_pack = ksocknal_pack_msg_v4, + .pro_unpack = ksocknal_unpack_msg_v4, + .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, + .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3, + .pro_handle_zcreq = ksocknal_handle_zcreq, + .pro_handle_zcack = ksocknal_handle_zcack, + .pro_match_tx = ksocknal_match_tx_v4, +};