From: eeb Date: Fri, 3 Jun 2005 09:36:56 +0000 (+0000) Subject: * Fixed 5858: vibnal arp retries X-Git-Tag: 1.4.10~1038 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=ec1562f6673fd57f30f1569da1753c5a0020b8ea;p=fs%2Flustre-release.git * Fixed 5858: vibnal arp retries --- diff --git a/lnet/klnds/viblnd/viblnd.h b/lnet/klnds/viblnd/viblnd.h index 59fa4c3..e3720b6 100644 --- a/lnet/klnds/viblnd/viblnd.h +++ b/lnet/klnds/viblnd/viblnd.h @@ -126,6 +126,8 @@ #define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ #define IBNAL_CREDIT_HIGHWATER 7 /* when to eagerly return credits */ +#define IBNAL_ARP_RETRIES 3 /* How many times to retry ARP */ + #define IBNAL_NTX 64 /* # tx descs */ #define IBNAL_NTX_NBLK 128 /* # reserved tx descs */ /* reduced from 256 to ensure we register < 255 pages per region. @@ -401,6 +403,7 @@ typedef struct kib_peer struct list_head ibp_conns; /* all active connections */ struct list_head ibp_tx_queue; /* msgs waiting for a conn */ int ibp_connecting; /* connecting+accepting */ + int ibp_arp_count; /* # arp attempts */ unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ unsigned long ibp_reconnect_interval; /* exponential backoff */ } kib_peer_t; diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c index 8a9c8a4..b3a8dd4 100644 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ b/lnet/klnds/viblnd/viblnd_cb.c @@ -1219,6 +1219,24 @@ kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) } void +kibnal_schedule_peer_arp (kib_peer_t *peer) +{ + unsigned long flags; + + LASSERT (peer->ibp_connecting != 0); + LASSERT (peer->ibp_arp_count > 0); + + kibnal_peer_addref(peer); /* extra ref for connd */ + + spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); + + list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers); + wake_up (&kibnal_data.kib_connd_waitq); + + spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); +} + +void kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) { kib_peer_t *peer; @@ -1287,15 +1305,8 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) } peer->ibp_connecting = 1; - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&peer->ibp_connd_list, - &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); + peer->ibp_arp_count = 1 + IBNAL_ARP_RETRIES; + kibnal_schedule_peer_arp(peer); } /* A connection is being established; queue the message... */ @@ -1709,6 +1720,21 @@ kibnal_thread_fini (void) } void +kibnal_schedule_conn (kib_conn_t *conn) +{ + unsigned long flags; + + kibnal_conn_addref(conn); /* ++ref for connd */ + + spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); + + list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); + wake_up (&kibnal_data.kib_connd_waitq); + + spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); +} + +void kibnal_close_conn_locked (kib_conn_t *conn, int error) { /* This just does the immmediate housekeeping. 'error' is zero for a @@ -1727,9 +1753,10 @@ kibnal_close_conn_locked (kib_conn_t *conn, int error) if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) return; /* already being handled */ - - spin_lock(&conn->ibc_lock); + /* NB Can't take ibc_lock here (could be in IRQ context), without + * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */ + if (error == 0 && list_empty(&conn->ibc_tx_queue) && list_empty(&conn->ibc_active_txs)) { @@ -1744,6 +1771,8 @@ kibnal_close_conn_locked (kib_conn_t *conn, int error) list_empty(&conn->ibc_active_txs) ? "" : "(waiting)", conn->ibc_txseq, conn->ibc_rxseq); +#if 0 + /* can't skip down the queue without holding ibc_lock (see above) */ list_for_each(tmp, &conn->ibc_tx_queue) { kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); @@ -1763,11 +1792,9 @@ kibnal_close_conn_locked (kib_conn_t *conn, int error) tx->tx_sending, tx->tx_waiting, (long)(tx->tx_deadline - jiffies), HZ); } +#endif } - spin_unlock(&conn->ibc_lock); - - /* connd takes ibc_list's ref */ list_del (&conn->ibc_list); if (list_empty (&peer->ibp_conns) && /* no more conns */ @@ -1778,12 +1805,8 @@ kibnal_close_conn_locked (kib_conn_t *conn, int error) kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECT1); - spin_lock(&kibnal_data.kib_connd_lock); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock(&kibnal_data.kib_connd_lock); + kibnal_schedule_conn(conn); + kibnal_conn_decref(conn); /* lose ibc_list's ref */ } void @@ -1983,6 +2006,7 @@ kibnal_connreq_done(kib_conn_t *conn, int active, int status) switch (conn->ibc_state) { default: LBUG(); + case IBNAL_CONN_ACTIVE_CHECK_REPLY: /* got a connection reply but failed checks */ LASSERT (active); @@ -2123,33 +2147,27 @@ kibnal_cm_callback(cm_cep_handle_t cep, cm_conn_data_t *cmdata, void *arg) case IBNAL_CONN_ACTIVE_RTU: /* kibnal_connreq_done is getting there; It'll see * ibc_disconnect set... */ - kibnal_conn_decref(conn); /* lose my ref */ break; case IBNAL_CONN_ESTABLISHED: /* kibnal_connreq_done got there already; get * disconnect going... */ kibnal_close_conn_locked(conn, 0); - kibnal_conn_decref(conn); /* lose my ref */ break; case IBNAL_CONN_DISCONNECT1: /* kibnal_terminate_conn is getting there; It'll see * ibc_disconnect set... */ - kibnal_conn_decref(conn); /* lose my ref */ break; case IBNAL_CONN_DISCONNECT2: /* kibnal_terminate_conn got there already; complete - * the disconnect. NB kib_connd_conns takes my ref */ - spin_lock(&kibnal_data.kib_connd_lock); - list_add_tail(&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up(&kibnal_data.kib_connd_waitq); - spin_unlock(&kibnal_data.kib_connd_lock); + * the disconnect. */ + kibnal_schedule_conn(conn); break; } write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return; + break; case cm_event_disconn_timeout: case cm_event_disconn_reply: @@ -2158,12 +2176,8 @@ kibnal_cm_callback(cm_cep_handle_t cep, cm_conn_data_t *cmdata, void *arg) LASSERT (!conn->ibc_disconnect); conn->ibc_disconnect = 1; - /* kibnal_terminate_conn sent the disconnect request. - * NB kib_connd_conns takes my ref */ - spin_lock(&kibnal_data.kib_connd_lock); - list_add_tail(&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up(&kibnal_data.kib_connd_waitq); - spin_unlock(&kibnal_data.kib_connd_lock); + /* kibnal_terminate_conn sent the disconnect request. */ + kibnal_schedule_conn(conn); write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); break; @@ -2173,13 +2187,12 @@ kibnal_cm_callback(cm_cep_handle_t cep, cm_conn_data_t *cmdata, void *arg) case cm_event_conn_reject: LASSERT (conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT); conn->ibc_connvars->cv_conndata = *cmdata; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - list_add_tail(&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up(&kibnal_data.kib_connd_waitq); - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); + + kibnal_schedule_conn(conn); break; } + + kibnal_conn_decref(conn); /* lose my ref */ } void @@ -2430,11 +2443,8 @@ kibnal_active_connect_callback (cm_cep_handle_t cep, cm_conn_data_t *cd, LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); cv->cv_conndata = *cd; - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - /* connd takes my ref */ - list_add_tail(&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up(&kibnal_data.kib_connd_waitq); - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); + kibnal_schedule_conn(conn); + kibnal_conn_decref(conn); } void @@ -2663,7 +2673,7 @@ kibnal_check_connreply (kib_conn_t *conn) } void -kibnal_send_connreq (kib_conn_t *conn) +kibnal_arp_done (kib_conn_t *conn) { kib_peer_t *peer = conn->ibc_peer; kib_connvars_t *cv = conn->ibc_connvars; @@ -2671,15 +2681,39 @@ kibnal_send_connreq (kib_conn_t *conn) ib_path_record_v2_t *path = &cv->cv_path; vv_return_t vvrc; int rc; + unsigned long flags; - /* Only called by connd => statics OK */ LASSERT (!in_interrupt()); LASSERT (current == kibnal_data.kib_connd); LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); + LASSERT (peer->ibp_arp_count > 0); if (cv->cv_arprc != ibat_stat_ok) { CERROR("Can't Arp "LPX64"@%u.%u.%u.%u: %d\n", peer->ibp_nid, HIPQUAD(peer->ibp_ip), cv->cv_arprc); + + write_lock_irqsave(&kibnal_data.kib_global_lock, flags); + peer->ibp_arp_count--; + if (peer->ibp_arp_count == 0) { + /* final ARP attempt failed */ + write_unlock_irqrestore(&kibnal_data.kib_global_lock, + flags); + CERROR("Arp "LPX64"@%u.%u.%u.%u failed: %d\n", + peer->ibp_nid, HIPQUAD(peer->ibp_ip), + cv->cv_arprc); + } else { + /* Retry ARP: ibp_connecting++ so terminating conn + * doesn't end peer's connection attempt */ + peer->ibp_connecting++; + write_unlock_irqrestore(&kibnal_data.kib_global_lock, + flags); + CWARN("Arp "LPX64"@%u.%u.%u.%u failed: %d " + "(%d attempts left)\n", + peer->ibp_nid, HIPQUAD(peer->ibp_ip), + cv->cv_arprc, peer->ibp_arp_count); + + kibnal_schedule_peer_arp(peer); + } kibnal_connreq_done(conn, 1, -ENETUNREACH); return; } @@ -2769,13 +2803,8 @@ kibnal_arp_callback (ibat_stat_t arprc, ibat_arp_data_t *arp_data, void *arg) if (arprc == ibat_stat_ok) conn->ibc_connvars->cv_arp = *arp_data; - /* connd takes over my ref on conn */ - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail(&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up(&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); + kibnal_schedule_conn(conn); + kibnal_conn_decref(conn); } void @@ -2788,6 +2817,7 @@ kibnal_arp_peer (kib_peer_t *peer) /* Only the connd does this (i.e. single threaded) */ LASSERT (current == kibnal_data.kib_connd); LASSERT (peer->ibp_connecting != 0); + LASSERT (peer->ibp_arp_count > 0); cep = cm_create_cep(cm_cep_transp_rc); if (cep == NULL) { @@ -2825,18 +2855,13 @@ kibnal_arp_peer (kib_peer_t *peer) break; case ibat_stat_ok: - /* Immediate return (ARP cache hit) == no callback. */ - conn->ibc_connvars->cv_arprc = ibat_stat_ok; - kibnal_send_connreq(conn); - kibnal_conn_decref(conn); - break; - case ibat_stat_error: case ibat_stat_timeout: case ibat_stat_not_found: - CERROR("Arp "LPX64"@%u.%u.%u.%u failed: %d\n", peer->ibp_nid, - HIPQUAD(peer->ibp_ip), ibatrc); - kibnal_connreq_done(conn, 1, -ENETUNREACH); + /* Immediate return (ARP cache hit or failure) == no callback. + * Do the next stage directly... */ + conn->ibc_connvars->cv_arprc = ibatrc; + kibnal_arp_done(conn); kibnal_conn_decref(conn); break; } @@ -3059,7 +3084,7 @@ kibnal_connd (void *arg) LBUG(); case IBNAL_CONN_ACTIVE_ARP: - kibnal_send_connreq(conn); + kibnal_arp_done(conn); break; case IBNAL_CONN_ACTIVE_CONNECT: