X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd.c;h=c2b0a96984b87b5e9b7d792f37f1be49fdc3c0ed;hb=7e26413aa85fdc931721cde36bae3bf2bb97e63f;hp=ab4adb4a10208e092bc5d9846ba05430a23600e7;hpb=a5cbe7883db6d77b82fbd83ad4c662499421d229;p=fs%2Flustre-release.git diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index ab4adb4..c2b0a96 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -152,14 +152,14 @@ ksocknal_destroy_conn_cb(struct ksock_conn_cb *conn_cb) } static struct ksock_peer_ni * -ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_create_peer(struct lnet_ni *ni, struct lnet_processid *id) { - int cpt = lnet_cpt_of_nid(id.nid, ni); + int cpt = lnet_nid2cpt(&id->nid, ni); struct ksock_net *net = ni->ni_data; struct ksock_peer_ni *peer_ni; - LASSERT(id.nid != LNET_NID_ANY); - LASSERT(id.pid != LNET_PID_ANY); + LASSERT(!LNET_NID_IS_ANY(&id->nid)); + LASSERT(id->pid != LNET_PID_ANY); LASSERT(!in_interrupt()); if (!atomic_inc_unless_negative(&net->ksnn_npeers)) { @@ -174,7 +174,7 @@ ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id) } peer_ni->ksnp_ni = ni; - peer_ni->ksnp_id = id; + peer_ni->ksnp_id = *id; refcount_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */ peer_ni->ksnp_closing = 0; peer_ni->ksnp_accepting = 0; @@ -197,7 +197,7 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) struct ksock_net *net = peer_ni->ksnp_ni->ni_data; CDEBUG (D_NET, "peer_ni %s %p deleted\n", - libcfs_id2str(peer_ni->ksnp_id), peer_ni); + libcfs_idstr(&peer_ni->ksnp_id), peer_ni); LASSERT(refcount_read(&peer_ni->ksnp_refcount) == 0); LASSERT(peer_ni->ksnp_accepting == 0); @@ -218,23 +218,24 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) } struct ksock_peer_ni * -ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_processid *id) { struct ksock_peer_ni *peer_ni; + unsigned long hash = nidhash(&id->nid); hash_for_each_possible(ksocknal_data.ksnd_peers, peer_ni, - ksnp_list, id.nid) { + ksnp_list, hash) { LASSERT(!peer_ni->ksnp_closing); if (peer_ni->ksnp_ni != ni) continue; - if (peer_ni->ksnp_id.nid != id.nid || - peer_ni->ksnp_id.pid != id.pid) + if (!nid_same(&peer_ni->ksnp_id.nid, &id->nid) || + peer_ni->ksnp_id.pid != id->pid) continue; CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d)\n", - peer_ni, libcfs_id2str(id), + peer_ni, libcfs_idstr(id), refcount_read(&peer_ni->ksnp_refcount)); return peer_ni; } @@ -242,7 +243,7 @@ ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) } struct ksock_peer_ni * -ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_find_peer(struct lnet_ni *ni, struct lnet_processid *id) { struct ksock_peer_ni *peer_ni; @@ -252,7 +253,7 @@ ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) ksocknal_peer_addref(peer_ni); read_unlock(&ksocknal_data.ksnd_global_lock); - return (peer_ni); + return peer_ni; } static void @@ -311,7 +312,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, if (index-- > 0) continue; - *id = peer_ni->ksnp_id; + id->pid = peer_ni->ksnp_id.pid; + id->nid = lnet_nid_to_nid4(&peer_ni->ksnp_id.nid); *myip = 0; *peer_ip = 0; *port = 0; @@ -325,7 +327,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, if (index-- > 0) continue; - *id = peer_ni->ksnp_id; + id->pid = peer_ni->ksnp_id.pid; + id->nid = lnet_nid_to_nid4(&peer_ni->ksnp_id.nid); *myip = peer_ni->ksnp_passive_ips[j]; *peer_ip = 0; *port = 0; @@ -341,7 +344,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, conn_cb = peer_ni->ksnp_conn_cb; - *id = peer_ni->ksnp_id; + id->pid = peer_ni->ksnp_id.pid; + id->nid = lnet_nid_to_nid4(&peer_ni->ksnp_id.nid); if (conn_cb->ksnr_addr.ss_family == AF_INET) { struct sockaddr_in *sa = (void *)&conn_cb->ksnr_addr; @@ -418,7 +422,9 @@ ksocknal_incr_conn_count(struct ksock_conn_cb *conn_cb, switch (type) { case SOCKLND_CONN_CONTROL: conn_cb->ksnr_ctrl_conn_count++; - /* there's a single control connection per peer */ + /* there's a single control connection per peer, + * two in case of loopback + */ conn_cb->ksnr_connected |= BIT(type); break; case SOCKLND_CONN_BULK_IN: @@ -444,6 +450,46 @@ ksocknal_incr_conn_count(struct ksock_conn_cb *conn_cb, type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns); } + +static void +ksocknal_decr_conn_count(struct ksock_conn_cb *conn_cb, + int type) +{ + conn_cb->ksnr_conn_count--; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + conn_cb->ksnr_ctrl_conn_count--; + /* there's a single control connection per peer, + * two in case of loopback + */ + if (conn_cb->ksnr_ctrl_conn_count == 0) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + conn_cb->ksnr_blki_conn_count--; + if (conn_cb->ksnr_blki_conn_count < conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + conn_cb->ksnr_blko_conn_count--; + if (conn_cb->ksnr_blko_conn_count < conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_ANY: + if (conn_cb->ksnr_conn_count < conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected &= ~BIT(type); + break; + default: + LBUG(); + break; + } + + CDEBUG(D_NET, "Del conn type %d, ksnr_connected %x ksnr_max_conns %d\n", + type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns); +} + static void ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, struct ksock_conn *conn) @@ -462,13 +508,13 @@ ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, if (conn_cb->ksnr_myiface < 0) { /* route wasn't bound locally yet (the initial route) */ CDEBUG(D_NET, "Binding %s %pIS to interface %d\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn_cb->ksnr_addr, conn_iface); } else { CDEBUG(D_NET, "Rebinding %s %pIS from interface %d to %d\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn_cb->ksnr_addr, conn_cb->ksnr_myiface, conn_iface); @@ -564,19 +610,23 @@ ksocknal_del_conn_cb_locked(struct ksock_conn_cb *conn_cb) } int -ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, +ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id4, struct sockaddr *addr) { struct ksock_peer_ni *peer_ni; struct ksock_peer_ni *peer2; struct ksock_conn_cb *conn_cb; + struct lnet_processid id; - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) + if (id4.nid == LNET_NID_ANY || + id4.pid == LNET_PID_ANY) return (-EINVAL); + id.pid = id4.pid; + lnet_nid4_to_nid(id4.nid, &id.nid); + /* Have a brand new peer_ni ready... */ - peer_ni = ksocknal_create_peer(ni, id); + peer_ni = ksocknal_create_peer(ni, &id); if (IS_ERR(peer_ni)) return PTR_ERR(peer_ni); @@ -592,13 +642,14 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0); - peer2 = ksocknal_find_peer_locked(ni, id); + peer2 = ksocknal_find_peer_locked(ni, &id); if (peer2 != NULL) { ksocknal_peer_decref(peer_ni); peer_ni = peer2; } else { /* peer_ni table takes my ref on peer_ni */ - hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, id.nid); + hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, + nidhash(&id.nid)); } ksocknal_add_conn_cb_locked(peer_ni, conn_cb); @@ -639,7 +690,7 @@ ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni, __u32 ip) } static int -ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) +ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id4, __u32 ip) { LIST_HEAD(zombies); struct hlist_node *pnxt; @@ -648,11 +699,16 @@ ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) int hi; int i; int rc = -ENOENT; + struct lnet_processid id; + + id.pid = id4.pid; + lnet_nid4_to_nid(id4.nid, &id.nid); write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) { - lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers)); + if (!LNET_NID_IS_ANY(&id.nid)) { + lo = hash_min(nidhash(&id.nid), + HASH_BITS(ksocknal_data.ksnd_peers)); hi = lo; } else { lo = 0; @@ -666,8 +722,8 @@ ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) if (peer_ni->ksnp_ni != ni) continue; - if (!((id.nid == LNET_NID_ANY || - peer_ni->ksnp_id.nid == id.nid) && + if (!((LNET_NID_IS_ANY(&id.nid) || + nid_same(&peer_ni->ksnp_id.nid, &id.nid)) && (id.pid == LNET_PID_ANY || peer_ni->ksnp_id.pid == id.pid))) continue; @@ -797,7 +853,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, { rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; LIST_HEAD(zombies); - struct lnet_process_id peerid; + struct lnet_process_id peerid4; u64 incarnation; struct ksock_conn *conn; struct ksock_conn *conn2; @@ -867,7 +923,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, /* Active connection sends HELLO eagerly */ hello->kshm_nips = 0; - peerid = peer_ni->ksnp_id; + peerid4 = lnet_pid_to_pid4(&peer_ni->ksnp_id); write_lock_bh(global_lock); conn->ksnc_proto = peer_ni->ksnp_proto; @@ -883,32 +939,35 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, #endif } - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + rc = ksocknal_send_hello(ni, conn, peerid4.nid, hello); if (rc != 0) goto failed_1; } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; + peerid4.nid = LNET_NID_ANY; + peerid4.pid = LNET_PID_ANY; /* Passive, get protocol from peer_ni */ conn->ksnc_proto = NULL; } - rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation); + rc = ksocknal_recv_hello(ni, conn, hello, &peerid4, &incarnation); if (rc < 0) goto failed_1; LASSERT(rc == 0 || active); LASSERT(conn->ksnc_proto != NULL); - LASSERT(peerid.nid != LNET_NID_ANY); + LASSERT(peerid4.nid != LNET_NID_ANY); - cpt = lnet_cpt_of_nid(peerid.nid, ni); + cpt = lnet_cpt_of_nid(peerid4.nid, ni); if (active) { ksocknal_peer_addref(peer_ni); write_lock_bh(global_lock); } else { - peer_ni = ksocknal_create_peer(ni, peerid); + struct lnet_processid peerid; + + lnet_pid4_to_pid(peerid4, &peerid); + peer_ni = ksocknal_create_peer(ni, &peerid); if (IS_ERR(peer_ni)) { rc = PTR_ERR(peer_ni); goto failed_1; @@ -919,12 +978,12 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, /* called with a ref on ni, so shutdown can't have started */ LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0); - peer2 = ksocknal_find_peer_locked(ni, peerid); + peer2 = ksocknal_find_peer_locked(ni, &peerid); if (peer2 == NULL) { /* NB this puts an "empty" peer_ni in the peer_ni * table (which takes my ref) */ hash_add(ksocknal_data.ksnd_peers, - &peer_ni->ksnp_list, peerid.nid); + &peer_ni->ksnp_list, nidhash(&peerid.nid)); } else { ksocknal_peer_decref(peer_ni); peer_ni = peer2; @@ -937,7 +996,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, /* Am I already connecting to this guy? Resolve in * favour of higher NID... */ - if (peerid.nid < ni->ni_nid && + if (peerid4.nid < lnet_nid_to_nid4(&ni->ni_nid) && ksocknal_connecting(peer_ni->ksnp_conn_cb, ((struct sockaddr *) &conn->ksnc_peeraddr))) { rc = EALREADY; @@ -1034,7 +1093,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, !rpc_cmp_addr((struct sockaddr *)&conn_cb->ksnr_addr, (struct sockaddr *)&conn->ksnc_peeraddr)) { CERROR("Route %s %pIS connected to %pIS\n", - libcfs_id2str(peer_ni->ksnp_id), + libcfs_idstr(&peer_ni->ksnp_id), &conn_cb->ksnr_addr, &conn->ksnc_peeraddr); } @@ -1104,13 +1163,13 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, CDEBUG(D_NET, "New conn %s p %d.x %pIS -> %pISp" " incarnation:%lld sched[%d]\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, + libcfs_id2str(peerid4), conn->ksnc_proto->pro_version, &conn->ksnc_myaddr, &conn->ksnc_peeraddr, incarnation, cpt); if (!active) { hello->kshm_nips = 0; - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + rc = ksocknal_send_hello(ni, conn, peerid4.nid, hello); } LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg, @@ -1167,10 +1226,10 @@ failed_2: if (warn != NULL) { if (rc < 0) CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); + libcfs_id2str(peerid4), conn->ksnc_type, warn); else CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); + libcfs_id2str(peerid4), conn->ksnc_type, warn); } if (!active) { @@ -1180,7 +1239,7 @@ failed_2: */ conn->ksnc_type = SOCKLND_CONN_NONE; hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); + ksocknal_send_hello(ni, conn, peerid4.nid, hello); } write_lock_bh(global_lock); @@ -1219,6 +1278,8 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) struct ksock_peer_ni *peer_ni = conn->ksnc_peer; struct ksock_conn_cb *conn_cb; struct ksock_conn *conn2; + int conn_count; + int duplicate_count = 0; LASSERT(peer_ni->ksnp_error == 0); LASSERT(!conn->ksnc_closing); @@ -1232,21 +1293,29 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) /* dissociate conn from cb... */ LASSERT(!conn_cb->ksnr_deleted); + conn_count = ksocknal_get_conn_count_by_type(conn_cb, + conn->ksnc_type); /* connected bit is set only if all connections * of the given type got created */ - if (ksocknal_get_conn_count_by_type(conn_cb, conn->ksnc_type) == - conn_cb->ksnr_max_conns) + if (conn_count == conn_cb->ksnr_max_conns) LASSERT((conn_cb->ksnr_connected & BIT(conn->ksnc_type)) != 0); - list_for_each_entry(conn2, &peer_ni->ksnp_conns, ksnc_list) { - if (conn2->ksnc_conn_cb == conn_cb && - conn2->ksnc_type == conn->ksnc_type) - goto conn2_found; + if (conn_count == 1) { + list_for_each_entry(conn2, &peer_ni->ksnp_conns, + ksnc_list) { + if (conn2->ksnc_conn_cb == conn_cb && + conn2->ksnc_type == conn->ksnc_type) + duplicate_count += 1; + } + if (duplicate_count > 0) + CERROR("Found %d duplicate conns type %d\n", + duplicate_count, + conn->ksnc_type); } - conn_cb->ksnr_connected &= ~BIT(conn->ksnc_type); -conn2_found: + ksocknal_decr_conn_count(conn_cb, conn->ksnc_type); + conn->ksnc_conn_cb = NULL; /* drop conn's ref on conn_cb */ @@ -1317,7 +1386,8 @@ ksocknal_peer_failed(struct ksock_peer_ni *peer_ni) read_unlock(&ksocknal_data.ksnd_global_lock); if (notify) - lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, + lnet_notify(peer_ni->ksnp_ni, + lnet_nid_to_nid4(&peer_ni->ksnp_id.nid), false, false, last_alive); } @@ -1455,9 +1525,10 @@ ksocknal_destroy_conn(struct ksock_conn *conn) last_rcv = conn->ksnc_rx_deadline - ksocknal_timeout(); CERROR("Completing partial receive from %s[%d], ip %pISp, with error, wanted: %d, left: %d, last alive is %lld secs ago\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + conn->ksnc_type, &conn->ksnc_peeraddr, - conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, + conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, ktime_get_seconds() - last_rcv); if (conn->ksnc_lnet_msg) conn->ksnc_lnet_msg->msg_health_status = @@ -1467,31 +1538,31 @@ ksocknal_destroy_conn(struct ksock_conn *conn) case SOCKNAL_RX_LNET_HEADER: if (conn->ksnc_rx_started) CERROR("Incomplete receive of lnet header from %s, ip %pISp, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr, conn->ksnc_proto->pro_version); break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) + case SOCKNAL_RX_KSM_HEADER: + if (conn->ksnc_rx_started) CERROR("Incomplete receive of ksock message from %s, ip %pISp, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr, conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) + break; + case SOCKNAL_RX_SLOP: + if (conn->ksnc_rx_started) CERROR("Incomplete receive of slops from %s, ip %pISp, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), + libcfs_idstr(&conn->ksnc_peer->ksnp_id), &conn->ksnc_peeraddr); - break; - default: - LBUG (); - break; - } + break; + default: + LBUG(); + break; + } - ksocknal_peer_decref(conn->ksnc_peer); + ksocknal_peer_decref(conn->ksnc_peer); - LIBCFS_FREE (conn, sizeof (*conn)); + LIBCFS_FREE(conn, sizeof(*conn)); } int @@ -1531,7 +1602,7 @@ ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why) } int -ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) +ksocknal_close_matching_conns(struct lnet_processid *id, __u32 ipaddr) { struct ksock_peer_ni *peer_ni; struct hlist_node *pnxt; @@ -1543,8 +1614,9 @@ ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) { - lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers)); + if (!LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), + HASH_BITS(ksocknal_data.ksnd_peers)); hi = lo; } else { lo = 0; @@ -1557,10 +1629,10 @@ ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) &ksocknal_data.ksnd_peers[i], ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer_ni->ksnp_id.pid))) + if (!((LNET_NID_IS_ANY(&id->nid) || + nid_same(&id->nid, &peer_ni->ksnp_id.nid)) && + (id->pid == LNET_PID_ANY || + id->pid == peer_ni->ksnp_id.pid))) continue; count += ksocknal_close_peer_conns_locked( @@ -1572,7 +1644,8 @@ ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) write_unlock_bh(&ksocknal_data.ksnd_global_lock); /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) + if (LNET_NID_IS_ANY(&id->nid) || id->pid == LNET_PID_ANY || + ipaddr == 0) return 0; return (count == 0 ? -ENOENT : 0); @@ -1584,15 +1657,15 @@ ksocknal_notify_gw_down(lnet_nid_t gw_nid) /* The router is telling me she's been notified of a change in * gateway state.... */ - struct lnet_process_id id = { - .nid = gw_nid, + struct lnet_processid id = { .pid = LNET_PID_ANY, }; CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid)); + lnet_nid4_to_nid(gw_nid, &id.nid); /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns(id, 0); + ksocknal_close_matching_conns(&id, 0); return; /* We can only establish new connections @@ -1630,15 +1703,16 @@ ksocknal_push_peer(struct ksock_peer_ni *peer_ni) } static int -ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_push(struct lnet_ni *ni, struct lnet_processid *id) { int lo; int hi; int bkt; int rc = -ENOENT; - if (id.nid != LNET_NID_ANY) { - lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers)); + if (!LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), + HASH_BITS(ksocknal_data.ksnd_peers)); hi = lo; } else { lo = 0; @@ -1656,10 +1730,11 @@ ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) hlist_for_each_entry(peer_ni, &ksocknal_data.ksnd_peers[bkt], ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer_ni->ksnp_id.pid))) + if (!((LNET_NID_IS_ANY(&id->nid) || + nid_same(&id->nid, + &peer_ni->ksnp_id.nid)) && + (id->pid == LNET_PID_ANY || + id->pid == peer_ni->ksnp_id.pid))) continue; if (i++ == peer_off) { @@ -1683,7 +1758,8 @@ ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) { - struct lnet_process_id id = {0}; + struct lnet_process_id id4 = {}; + struct lnet_processid id = {}; struct libcfs_ioctl_data *data = arg; int rc; @@ -1715,43 +1791,43 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) return rc; } - case IOC_LIBCFS_GET_PEER: { - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; + case IOC_LIBCFS_GET_PEER: { + __u32 myip = 0; + __u32 ip = 0; + int port = 0; + int conn_count = 0; + int share_count = 0; - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc != 0) - return rc; + rc = ksocknal_get_peer_info(ni, data->ioc_count, + &id4, &myip, &ip, &port, + &conn_count, &share_count); + if (rc != 0) + return rc; - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } + data->ioc_nid = id4.nid; + data->ioc_count = share_count; + data->ioc_u32[0] = ip; + data->ioc_u32[1] = port; + data->ioc_u32[2] = myip; + data->ioc_u32[3] = conn_count; + data->ioc_u32[4] = id4.pid; + return 0; + } case IOC_LIBCFS_ADD_PEER: { struct sockaddr_in sa = {.sin_family = AF_INET}; - id.nid = data->ioc_nid; - id.pid = LNET_PID_LUSTRE; + id4.nid = data->ioc_nid; + id4.pid = LNET_PID_LUSTRE; sa.sin_addr.s_addr = htonl(data->ioc_u32[0]); sa.sin_port = htons(data->ioc_u32[1]); - return ksocknal_add_peer(ni, id, (struct sockaddr *)&sa); + return ksocknal_add_peer(ni, id4, (struct sockaddr *)&sa); } - case IOC_LIBCFS_DEL_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_del_peer (ni, id, - data->ioc_u32[0]); /* IP */ + case IOC_LIBCFS_DEL_PEER: + id4.nid = data->ioc_nid; + id4.pid = LNET_PID_ANY; + return ksocknal_del_peer(ni, id4, + data->ioc_u32[0]); /* IP */ case IOC_LIBCFS_GET_CONN: { int txmem; @@ -1766,9 +1842,9 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; + data->ioc_count = txmem; + data->ioc_nid = lnet_nid_to_nid4(&conn->ksnc_peer->ksnp_id.nid); + data->ioc_flags = nagle; if (psa->sin_family == AF_INET) data->ioc_u32[0] = ntohl(psa->sin_addr.s_addr); else @@ -1787,31 +1863,32 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) return 0; } - case IOC_LIBCFS_CLOSE_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_close_matching_conns (id, - data->ioc_u32[0]); - - case IOC_LIBCFS_REGISTER_MYNID: - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) - return 0; - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - - case IOC_LIBCFS_PUSH_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_push(ni, id); - - default: - return -EINVAL; - } - /* not reached */ + case IOC_LIBCFS_CLOSE_CONNECTION: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_close_matching_conns(&id, + data->ioc_u32[0]); + + case IOC_LIBCFS_REGISTER_MYNID: + /* Ignore if this is a noop */ + if (nid_is_nid4(&ni->ni_nid) && + data->ioc_nid == lnet_nid_to_nid4(&ni->ni_nid)) + return 0; + + CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", + libcfs_nid2str(data->ioc_nid), + libcfs_nidstr(&ni->ni_nid)); + return -EINVAL; + + case IOC_LIBCFS_PUSH_CONNECTION: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_push(ni, &id); + + default: + return -EINVAL; + } + /* not reached */ } static void @@ -1847,11 +1924,15 @@ static int ksocknal_get_link_status(struct net_device *dev) LASSERT(dev); - if (!netif_running(dev)) + if (!netif_running(dev)) { ret = 0; + CDEBUG(D_NET, "device not running\n"); + } /* Some devices may not be providing link settings */ - else if (dev->ethtool_ops->get_link) + else if (dev->ethtool_ops->get_link) { ret = dev->ethtool_ops->get_link(dev); + CDEBUG(D_NET, "get_link returns %u\n", ret); + } return ret; } @@ -1860,11 +1941,16 @@ static int ksocknal_handle_link_state_change(struct net_device *dev, unsigned char operstate) { - struct lnet_ni *ni; + struct lnet_ni *ni = NULL; struct ksock_net *net; struct ksock_net *cnxt; int ifindex; unsigned char link_down = !(operstate == IF_OPER_UP); + struct in_device *in_dev; + bool found_ip = false; + struct ksock_interface *ksi = NULL; + struct sockaddr_in *sa; + DECLARE_CONST_IN_IFADDR(ifa); ifindex = dev->ifindex; @@ -1873,20 +1959,92 @@ ksocknal_handle_link_state_change(struct net_device *dev, list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, ksnn_list) { - if (net->ksnn_interface.ksni_index != ifindex) + + ksi = &net->ksnn_interface; + sa = (void *)&ksi->ksni_addr; + found_ip = false; + + if (ksi->ksni_index != ifindex || + strcmp(ksi->ksni_name, dev->name)) continue; + ni = net->ksnn_ni; - if (link_down) + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) { + CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", + dev->name); + CDEBUG(D_NET, "set link fatal state to 1\n"); + atomic_set(&ni->ni_fatal_error_on, 1); + continue; + } + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (sa->sin_addr.s_addr == ifa->ifa_local) + found_ip = true; + } + endfor_ifa(in_dev); + + if (!found_ip) { + CDEBUG(D_NET, "Interface %s has no matching ip\n", + dev->name); + CDEBUG(D_NET, "set link fatal state to 1\n"); + atomic_set(&ni->ni_fatal_error_on, 1); + continue; + } + + if (link_down) { + CDEBUG(D_NET, "set link fatal state to 1\n"); atomic_set(&ni->ni_fatal_error_on, link_down); - else + } else { + CDEBUG(D_NET, "set link fatal state to %u\n", + (ksocknal_get_link_status(dev) == 0)); atomic_set(&ni->ni_fatal_error_on, (ksocknal_get_link_status(dev) == 0)); + } } out: return 0; } +static int +ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) +{ + struct lnet_ni *ni; + struct ksock_net *net; + struct ksock_net *cnxt; + struct net_device *event_netdev = ifa->ifa_dev->dev; + int ifindex; + struct ksock_interface *ksi = NULL; + struct sockaddr_in *sa; + + if (!ksocknal_data.ksnd_nnets) + goto out; + + ifindex = event_netdev->ifindex; + + list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, + ksnn_list) { + + ksi = &net->ksnn_interface; + sa = (void *)&ksi->ksni_addr; + + if (ksi->ksni_index != ifindex || + strcmp(ksi->ksni_name, event_netdev->name)) + continue; + + if (sa->sin_addr.s_addr == ifa->ifa_local) { + CDEBUG(D_NET, "set link fatal state to %u\n", + (event == NETDEV_DOWN)); + ni = net->ksnn_ni; + atomic_set(&ni->ni_fatal_error_on, + (event == NETDEV_DOWN)); + } + } +out: + return 0; +} + /************************************ * Net device notifier event handler ************************************/ @@ -1898,6 +2056,9 @@ static int ksocknal_device_event(struct notifier_block *unused, operstate = dev->operstate; + CDEBUG(D_NET, "devevent: status=%ld, iface=%s ifindex %d state %u\n", + event, dev->name, dev->ifindex, operstate); + switch (event) { case NETDEV_UP: case NETDEV_DOWN: @@ -1909,10 +2070,36 @@ static int ksocknal_device_event(struct notifier_block *unused, return NOTIFY_OK; } -static struct notifier_block ksocknal_notifier_block = { +/************************************ + * Inetaddr notifier event handler + ************************************/ +static int ksocknal_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + + CDEBUG(D_NET, "addrevent: status %ld ip addr %pI4, netmask %pI4.\n", + event, &ifa->ifa_address, &ifa->ifa_mask); + + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + ksocknal_handle_inetaddr_change(ifa, event); + break; + + } + return NOTIFY_OK; +} + +static struct notifier_block ksocknal_dev_notifier_block = { .notifier_call = ksocknal_device_event, }; +static struct notifier_block ksocknal_inetaddr_notifier_block = { + .notifier_call = ksocknal_inetaddr_event, +}; + static void ksocknal_base_shutdown(void) { @@ -1924,8 +2111,10 @@ ksocknal_base_shutdown(void) libcfs_kmem_read()); LASSERT (ksocknal_data.ksnd_nnets == 0); - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) - unregister_netdevice_notifier(&ksocknal_notifier_block); + if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) { + unregister_netdevice_notifier(&ksocknal_dev_notifier_block); + unregister_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); + } switch (ksocknal_data.ksnd_init) { default: @@ -2091,7 +2280,8 @@ ksocknal_base_startup(void) goto failed; } - register_netdevice_notifier(&ksocknal_notifier_block); + register_netdevice_notifier(&ksocknal_dev_notifier_block); + register_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; @@ -2118,9 +2308,8 @@ ksocknal_debug_peerhash(struct lnet_ni *ni) if (peer_ni->ksnp_ni != ni) continue; - CWARN("Active peer_ni on shutdown: %s, ref %d, " - "closing %d, accepting %d, err %d, zcookie %llu, " - "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id), + CWARN("Active peer_ni on shutdown: %s, ref %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n", + libcfs_idstr(&peer_ni->ksnp_id), refcount_read(&peer_ni->ksnp_refcount), peer_ni->ksnp_closing, peer_ni->ksnp_accepting, peer_ni->ksnp_error, @@ -2348,10 +2537,8 @@ ksocknal_startup(struct lnet_ni *ni) LASSERT(ksi); LASSERT(ksi->ksni_addr.ss_family == AF_INET); - ni->ni_nid = LNET_MKNID( - LNET_NIDNET(ni->ni_nid), - ntohl(((struct sockaddr_in *) - &ksi->ksni_addr)->sin_addr.s_addr)); + ni->ni_nid.nid_addr[0] = + ((struct sockaddr_in *)&ksi->ksni_addr)->sin_addr.s_addr; list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); net->ksnn_ni = ni; ksocknal_data.ksnd_nnets++; @@ -2367,7 +2554,6 @@ fail_0: return -ENETDOWN; } - static void __exit ksocklnd_exit(void) { lnet_unregister_lnd(&the_ksocklnd);