Whamcloud - gitweb
* socklnd: fixed my stupid blunder that could cause the assertion
authoreeb <eeb>
Tue, 25 Oct 2005 00:07:44 +0000 (00:07 +0000)
committereeb <eeb>
Tue, 25 Oct 2005 00:07:44 +0000 (00:07 +0000)
     failure...

     LustreError: 20480:0:(socklnd_cb.c:788:ksocknal_launch_packet())
     ASSERTION(peer->ksnp_accepting > 0 ||
               ksocknal_find_connecting_route_locked(peer) != NULL) failed

*    iiblnd: fixed connection race and tested on boston, but didn't manage
     to exercise the race resolution code.

lnet/klnds/iiblnd/iiblnd_cb.c
lnet/klnds/socklnd/socklnd_cb.c

index dc0fd4b..8f0da57 100644 (file)
@@ -2273,7 +2273,6 @@ kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
         CM_REPLY_INFO    *rep;
         kib_conn_t       *conn;
         FSTATUS           frc;
-        int               reason;
         int               rc;
         
         LASSERT(arg == NULL); /* no conn yet for passive */
index dce1147..462ed7f 100644 (file)
@@ -784,13 +784,19 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
                 return (0);
         }
 
-        LASSERT (peer->ksnp_accepting > 0 ||
-                 ksocknal_find_connecting_route_locked (peer) != NULL);
-
-        /* Queue the message until a connection is established */
-        list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
+        if (peer->ksnp_accepting > 0 ||
+            ksocknal_find_connecting_route_locked (peer) != NULL) {
+                /* Queue the message until a connection is established */
+                list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
+                write_unlock_irqrestore (g_lock, flags);
+                return 0;
+        }
+        
         write_unlock_irqrestore (g_lock, flags);
-        return 0;
+
+        /* NB Routes may be ignored if connections to them failed recently */
+        CERROR("No usable routes to %s\n", libcfs_id2str(id));
+        return (-EHOSTUNREACH);
 }
 
 int