Whamcloud - gitweb
* Fixed qswnal peer death notification bug
authoreeb <eeb>
Mon, 29 Sep 2003 10:45:41 +0000 (10:45 +0000)
committereeb <eeb>
Mon, 29 Sep 2003 10:45:41 +0000 (10:45 +0000)
*  Fixed missing kfree in the router

*  Converted some socknal and router CERRORs into CWARN/CDEBUGs

lnet/klnds/qswlnd/qswlnd_cb.c
lnet/klnds/socklnd/socklnd_cb.c
lnet/router/router.c
lustre/portals/knals/qswnal/qswnal_cb.c
lustre/portals/knals/socknal/socknal_cb.c
lustre/portals/router/router.c

index 99f299f..7f8bc96 100644 (file)
@@ -119,6 +119,18 @@ kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
 }
 
 void
+kqswnal_notify_peer_down(kqswnal_tx_t *ktx)
+{
+        struct timeval     now;
+        time_t             then;
+
+        do_gettimeofday (&now);
+        then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ;
+
+        kpr_notify(&kqswnal_data.kqn_router, ktx->ktx_nid, 0, then);
+}
+
+void
 kqswnal_unmap_tx (kqswnal_tx_t *ktx)
 {
         if (ktx->ktx_nmappedpages == 0)
@@ -421,8 +433,6 @@ static void
 kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
 {
         kqswnal_tx_t      *ktx = (kqswnal_tx_t *)arg;
-        struct timeval     now;
-        time_t             then;
         
         LASSERT (txd != NULL);
         LASSERT (ktx != NULL);
@@ -437,12 +447,7 @@ kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
                 CERROR ("Tx completion to "LPX64" failed: %d\n", 
                         ktx->ktx_nid, status);
 
-                do_gettimeofday (&now);
-                then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ;
-        
-                kpr_notify (&kqswnal_data.kqn_router, 
-                            ktx->ktx_nid, 0, then);
-
+                kqswnal_notify_peer_down(ktx);
                 status = -EIO;
         }
 
@@ -483,10 +488,7 @@ kqswnal_launch (kqswnal_tx_t *ktx)
 
         default: /* fatal error */
                 CERROR ("Tx to "LPX64" failed: %d\n", ktx->ktx_nid, rc);
-
-                /* Tell router I think a node is down */
-                kpr_notify (&kqswnal_data.kqn_router, ktx->ktx_nid,
-                            0, ktx->ktx_launchtime);
+                kqswnal_notify_peer_down(ktx);
                 return (rc);
         }
 }
index b0b9342..65db867 100644 (file)
@@ -1526,9 +1526,9 @@ ksocknal_process_receive (ksock_sched_t *sched, unsigned long *irq_flags)
                                         conn, rc, conn->ksnc_peer->ksnp_nid,
                                         conn->ksnc_ipaddr, conn->ksnc_port);
                         else
-                                CERROR ("[%p] EOF from "LPX64" ip %08x:%d\n",
-                                        conn, conn->ksnc_peer->ksnp_nid,
-                                        conn->ksnc_ipaddr, conn->ksnc_port);
+                                CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n",
+                                       conn, conn->ksnc_peer->ksnp_nid,
+                                       conn->ksnc_ipaddr, conn->ksnc_port);
                 }
                 goto out;
         }
index a03fb42..32f741f 100644 (file)
@@ -119,6 +119,8 @@ kpr_do_upcall (void *arg)
         snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
 
         portals_run_upcall (argv);
+
+        kfree (u);
 }
 
 void
@@ -161,11 +163,12 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         /* can't do predictions... */
         do_gettimeofday (&now);
         if (when > now.tv_sec) {
-                CERROR ("Ignoring prediction from %s of [%d] "LPX64" %s "
-                        "%ld seconds in the future\n", 
-                byNal ? "NAL" : "userspace", 
-                gateway_nalid, gateway_nid, alive ? "up" : "down",
-                        when - now.tv_sec);
+                CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s "
+                       "%ld seconds in the future\n", 
+                       byNal ? "NAL" : "userspace", 
+                       gateway_nalid, gateway_nid, 
+                       alive ? "up" : "down",
+                       when - now.tv_sec);
                 return (EINVAL);
         }
 
@@ -189,14 +192,14 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         if (rc != 0) {
                 /* gateway not found */
                 write_unlock_irqrestore(&kpr_rwlock, flags);
-                CERROR ("Gateway not found\n");
+                CDEBUG (D_NET, "Gateway not found\n");
                 return (rc);
         }
         
         if (when < ge->kpge_timestamp) {
                 /* out of date information */
                 write_unlock_irqrestore (&kpr_rwlock, flags);
-                CERROR ("Out of date\n");
+                CDEBUG (D_NET, "Out of date\n");
                 return (0);
         }
 
@@ -206,7 +209,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         if ((!ge->kpge_alive) == (!alive)) {
                 /* new date for old news */
                 write_unlock_irqrestore (&kpr_rwlock, flags);
-                CERROR ("Old news\n");
+                CDEBUG (D_NET, "Old news\n");
                 return (0);
         }
 
@@ -250,10 +253,12 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         
         if (byNal) {
                 /* It wasn't userland that notified me... */
-                CERROR ("Doing upcall\n");
+                CWARN ("Upcall: NAL %d NID "LPX64" is %s\n",
+                       gateway_nalid, gateway_nid,
+                       alive ? "alive" : "dead");
                 kpr_upcall (gateway_nalid, gateway_nid, alive, when);
         } else {
-                CERROR (" NOT Doing upcall\n");
+                CDEBUG (D_NET, " NOT Doing upcall\n");
         }
         
         return (0);
index 99f299f..7f8bc96 100644 (file)
@@ -119,6 +119,18 @@ kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
 }
 
 void
+kqswnal_notify_peer_down(kqswnal_tx_t *ktx)
+{
+        struct timeval     now;
+        time_t             then;
+
+        do_gettimeofday (&now);
+        then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ;
+
+        kpr_notify(&kqswnal_data.kqn_router, ktx->ktx_nid, 0, then);
+}
+
+void
 kqswnal_unmap_tx (kqswnal_tx_t *ktx)
 {
         if (ktx->ktx_nmappedpages == 0)
@@ -421,8 +433,6 @@ static void
 kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
 {
         kqswnal_tx_t      *ktx = (kqswnal_tx_t *)arg;
-        struct timeval     now;
-        time_t             then;
         
         LASSERT (txd != NULL);
         LASSERT (ktx != NULL);
@@ -437,12 +447,7 @@ kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
                 CERROR ("Tx completion to "LPX64" failed: %d\n", 
                         ktx->ktx_nid, status);
 
-                do_gettimeofday (&now);
-                then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ;
-        
-                kpr_notify (&kqswnal_data.kqn_router, 
-                            ktx->ktx_nid, 0, then);
-
+                kqswnal_notify_peer_down(ktx);
                 status = -EIO;
         }
 
@@ -483,10 +488,7 @@ kqswnal_launch (kqswnal_tx_t *ktx)
 
         default: /* fatal error */
                 CERROR ("Tx to "LPX64" failed: %d\n", ktx->ktx_nid, rc);
-
-                /* Tell router I think a node is down */
-                kpr_notify (&kqswnal_data.kqn_router, ktx->ktx_nid,
-                            0, ktx->ktx_launchtime);
+                kqswnal_notify_peer_down(ktx);
                 return (rc);
         }
 }
index b0b9342..65db867 100644 (file)
@@ -1526,9 +1526,9 @@ ksocknal_process_receive (ksock_sched_t *sched, unsigned long *irq_flags)
                                         conn, rc, conn->ksnc_peer->ksnp_nid,
                                         conn->ksnc_ipaddr, conn->ksnc_port);
                         else
-                                CERROR ("[%p] EOF from "LPX64" ip %08x:%d\n",
-                                        conn, conn->ksnc_peer->ksnp_nid,
-                                        conn->ksnc_ipaddr, conn->ksnc_port);
+                                CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n",
+                                       conn, conn->ksnc_peer->ksnp_nid,
+                                       conn->ksnc_ipaddr, conn->ksnc_port);
                 }
                 goto out;
         }
index a03fb42..32f741f 100644 (file)
@@ -119,6 +119,8 @@ kpr_do_upcall (void *arg)
         snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
 
         portals_run_upcall (argv);
+
+        kfree (u);
 }
 
 void
@@ -161,11 +163,12 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         /* can't do predictions... */
         do_gettimeofday (&now);
         if (when > now.tv_sec) {
-                CERROR ("Ignoring prediction from %s of [%d] "LPX64" %s "
-                        "%ld seconds in the future\n", 
-                byNal ? "NAL" : "userspace", 
-                gateway_nalid, gateway_nid, alive ? "up" : "down",
-                        when - now.tv_sec);
+                CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s "
+                       "%ld seconds in the future\n", 
+                       byNal ? "NAL" : "userspace", 
+                       gateway_nalid, gateway_nid, 
+                       alive ? "up" : "down",
+                       when - now.tv_sec);
                 return (EINVAL);
         }
 
@@ -189,14 +192,14 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         if (rc != 0) {
                 /* gateway not found */
                 write_unlock_irqrestore(&kpr_rwlock, flags);
-                CERROR ("Gateway not found\n");
+                CDEBUG (D_NET, "Gateway not found\n");
                 return (rc);
         }
         
         if (when < ge->kpge_timestamp) {
                 /* out of date information */
                 write_unlock_irqrestore (&kpr_rwlock, flags);
-                CERROR ("Out of date\n");
+                CDEBUG (D_NET, "Out of date\n");
                 return (0);
         }
 
@@ -206,7 +209,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         if ((!ge->kpge_alive) == (!alive)) {
                 /* new date for old news */
                 write_unlock_irqrestore (&kpr_rwlock, flags);
-                CERROR ("Old news\n");
+                CDEBUG (D_NET, "Old news\n");
                 return (0);
         }
 
@@ -250,10 +253,12 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         
         if (byNal) {
                 /* It wasn't userland that notified me... */
-                CERROR ("Doing upcall\n");
+                CWARN ("Upcall: NAL %d NID "LPX64" is %s\n",
+                       gateway_nalid, gateway_nid,
+                       alive ? "alive" : "dead");
                 kpr_upcall (gateway_nalid, gateway_nid, alive, when);
         } else {
-                CERROR (" NOT Doing upcall\n");
+                CDEBUG (D_NET, " NOT Doing upcall\n");
         }
         
         return (0);