+ CERROR("Error %d reading or checking hello from from %pIS\n",
+ rc, &conn->ksnc_peeraddr);
+ LASSERT (rc < 0);
+ return rc;
+ }
+
+ *incarnation = hello->kshm_src_incarnation;
+
+ if (hello->kshm_src_nid == LNET_NID_ANY) {
+ CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pIS\n",
+ &conn->ksnc_peeraddr);
+ return -EPROTO;
+ }
+
+ if (!active &&
+ rpc_get_port((struct sockaddr *)&conn->ksnc_peeraddr) >
+ LNET_ACCEPTOR_MAX_RESERVED_PORT) {
+ /* Userspace NAL assigns peer_ni process ID from socket */
+ recv_id.pid = rpc_get_port((struct sockaddr *)
+ &conn->ksnc_peeraddr) |
+ LNET_PID_USERFLAG;
+ LASSERT(conn->ksnc_peeraddr.ss_family == AF_INET);
+ recv_id.nid = LNET_MKNID(
+ LNET_NIDNET(ni->ni_nid),
+ ntohl(((struct sockaddr_in *)
+ &conn->ksnc_peeraddr)->sin_addr.s_addr));
+ } else {
+ recv_id.nid = hello->kshm_src_nid;
+ recv_id.pid = hello->kshm_src_pid;
+ }
+
+ if (!active) {
+ *peerid = recv_id;
+
+ /* peer_ni determines type */
+ conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
+ if (conn->ksnc_type == SOCKLND_CONN_NONE) {
+ CERROR("Unexpected type %d from %s ip %pIS\n",
+ hello->kshm_ctype, libcfs_id2str(*peerid),
+ &conn->ksnc_peeraddr);
+ return -EPROTO;
+ }
+ return 0;
+ }
+
+ if (peerid->pid != recv_id.pid ||
+ peerid->nid != recv_id.nid) {
+ LCONSOLE_ERROR_MSG(0x130,
+ "Connected successfully to %s on host %pIS, but they claimed they were %s; please check your Lustre configuration.\n",
+ libcfs_id2str(*peerid),
+ &conn->ksnc_peeraddr,
+ libcfs_id2str(recv_id));
+ return -EPROTO;
+ }
+
+ if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
+ /* Possible protocol mismatch or I lost the connection race */
+ return proto_match ? EALREADY : EPROTO;
+ }
+
+ if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
+ CERROR("Mismatched types: me %d, %s ip %pIS %d\n",
+ conn->ksnc_type, libcfs_id2str(*peerid),
+ &conn->ksnc_peeraddr,
+ hello->kshm_ctype);
+ return -EPROTO;
+ }
+ return 0;
+}
+
+static bool
+ksocknal_connect(struct ksock_conn_cb *conn_cb)
+{
+ LIST_HEAD(zombies);
+ struct ksock_peer_ni *peer_ni = conn_cb->ksnr_peer;
+ int type;
+ int wanted;
+ struct socket *sock;
+ time64_t deadline;
+ bool retry_later = false;
+ int rc = 0;
+
+ deadline = ktime_get_seconds() + ksocknal_timeout();
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ LASSERT(conn_cb->ksnr_scheduled);
+ LASSERT(!conn_cb->ksnr_connecting);
+
+ conn_cb->ksnr_connecting = 1;
+
+ for (;;) {
+ wanted = ksocknal_conn_cb_mask() & ~conn_cb->ksnr_connected;
+
+ /* stop connecting if peer_ni/cb got closed under me, or
+ * conn cb got connected while queued
+ */
+ if (peer_ni->ksnp_closing || conn_cb->ksnr_deleted ||
+ wanted == 0) {
+ retry_later = false;
+ break;
+ }
+
+ /* reschedule if peer_ni is connecting to me */
+ if (peer_ni->ksnp_accepting > 0) {
+ CDEBUG(D_NET,
+ "peer_ni %s(%d) already connecting to me, retry later.\n",
+ libcfs_nid2str(peer_ni->ksnp_id.nid), peer_ni->ksnp_accepting);
+ retry_later = true;
+ }
+
+ if (retry_later) /* needs reschedule */
+ break;
+
+ if ((wanted & BIT(SOCKLND_CONN_ANY)) != 0) {
+ type = SOCKLND_CONN_ANY;
+ } else if ((wanted & BIT(SOCKLND_CONN_CONTROL)) != 0) {
+ type = SOCKLND_CONN_CONTROL;
+ } else if ((wanted & BIT(SOCKLND_CONN_BULK_IN)) != 0 &&
+ conn_cb->ksnr_blki_conn_count <= conn_cb->ksnr_blko_conn_count) {
+ type = SOCKLND_CONN_BULK_IN;
+ } else {
+ LASSERT ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) != 0);
+ type = SOCKLND_CONN_BULK_OUT;
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ if (ktime_get_seconds() >= deadline) {
+ rc = -ETIMEDOUT;
+ lnet_connect_console_error(rc, peer_ni->ksnp_id.nid,
+ (struct sockaddr *)
+ &conn_cb->ksnr_addr);
+ goto failed;
+ }
+
+ sock = lnet_connect(peer_ni->ksnp_id.nid,
+ conn_cb->ksnr_myiface,
+ (struct sockaddr *)&conn_cb->ksnr_addr,
+ peer_ni->ksnp_ni->ni_net_ns);
+ if (IS_ERR(sock)) {
+ rc = PTR_ERR(sock);
+ goto failed;
+ }
+
+ rc = ksocknal_create_conn(peer_ni->ksnp_ni, conn_cb, sock,
+ type);
+ if (rc < 0) {
+ lnet_connect_console_error(rc, peer_ni->ksnp_id.nid,
+ (struct sockaddr *)
+ &conn_cb->ksnr_addr);
+ goto failed;
+ }
+
+ /* A +ve RC means I have to retry because I lost the connection
+ * race or I have to renegotiate protocol version */
+ retry_later = (rc != 0);
+ if (retry_later)
+ CDEBUG(D_NET, "peer_ni %s: conn race, retry later.\n",
+ libcfs_nid2str(peer_ni->ksnp_id.nid));
+
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+ }
+
+ conn_cb->ksnr_scheduled = 0;
+ conn_cb->ksnr_connecting = 0;
+
+ if (retry_later) {
+ /* re-queue for attention; this frees me up to handle
+ * the peer_ni's incoming connection request
+ */
+
+ if (rc == EALREADY ||
+ (rc == 0 && peer_ni->ksnp_accepting > 0)) {
+ /* We want to introduce a delay before next
+ * attempt to connect if we lost conn race, but
+ * the race is resolved quickly usually, so
+ * min_reconnectms should be good heuristic
+ */
+ conn_cb->ksnr_retry_interval =
+ *ksocknal_tunables.ksnd_min_reconnectms / 1000;
+ conn_cb->ksnr_timeout = ktime_get_seconds() +
+ conn_cb->ksnr_retry_interval;
+ }
+
+ ksocknal_launch_connection_locked(conn_cb);
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+ return retry_later;
+
+ failed:
+ write_lock_bh(&ksocknal_data.ksnd_global_lock);
+
+ conn_cb->ksnr_scheduled = 0;
+ conn_cb->ksnr_connecting = 0;
+
+ /* This is a retry rather than a new connection */
+ conn_cb->ksnr_retry_interval *= 2;
+ conn_cb->ksnr_retry_interval =
+ max_t(time64_t, conn_cb->ksnr_retry_interval,
+ *ksocknal_tunables.ksnd_min_reconnectms / 1000);
+ conn_cb->ksnr_retry_interval =
+ min_t(time64_t, conn_cb->ksnr_retry_interval,
+ *ksocknal_tunables.ksnd_max_reconnectms / 1000);
+
+ LASSERT(conn_cb->ksnr_retry_interval);
+ conn_cb->ksnr_timeout = ktime_get_seconds() +
+ conn_cb->ksnr_retry_interval;
+
+ if (!list_empty(&peer_ni->ksnp_tx_queue) &&
+ peer_ni->ksnp_accepting == 0 &&
+ !ksocknal_find_connecting_conn_cb_locked(peer_ni)) {
+ struct ksock_conn *conn;
+
+ /* ksnp_tx_queue is queued on a conn on successful
+ * connection for V1.x and V2.x
+ */
+ conn = list_first_entry_or_null(&peer_ni->ksnp_conns,
+ struct ksock_conn, ksnc_list);
+ if (conn)
+ LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
+
+ /* take all the blocked packets while I've got the lock and
+ * complete below...
+ */
+ list_splice_init(&peer_ni->ksnp_tx_queue, &zombies);
+ }
+
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_peer_failed(peer_ni);
+ ksocknal_txlist_done(peer_ni->ksnp_ni, &zombies, rc);
+ return 0;
+}
+
+/*
+ * check whether we need to create more connds.
+ * It will try to create new thread if it's necessary, @timeout can
+ * be updated if failed to create, so caller wouldn't keep try while
+ * running out of resource.
+ */
+static int
+ksocknal_connd_check_start(time64_t sec, long *timeout)
+{
+ char name[16];
+ int rc;
+ int total = ksocknal_data.ksnd_connd_starting +
+ ksocknal_data.ksnd_connd_running;
+
+ if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) {
+ /* still in initializing */
+ return 0;