Whamcloud - gitweb
* Applied the last patch in Bug 2306, which changes the portals router/NAL
[fs/lustre-release.git] / lustre / portals / router / router.c
index a03fb42..d0dbf0a 100644 (file)
@@ -119,17 +119,23 @@ kpr_do_upcall (void *arg)
         snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
 
         portals_run_upcall (argv);
+
+        kfree (u);
 }
 
 void
 kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when)
 {
+        char str[PTL_NALFMT_SIZE];
+        
         /* May be in arbitrary context */
         kpr_upcall_t  *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC);
 
         if (u == NULL) {
-                CERROR ("Upcall out of memory: nal %d nid "LPX64" %s\n",
-                        gw_nalid, gw_nid, alive ? "up" : "down");
+                CERROR ("Upcall out of memory: nal %d nid "LPX64" (%s) %s\n",
+                        gw_nalid, gw_nid,
+                        portals_nid2str(gw_nalid, gw_nid, str),
+                        alive ? "up" : "down");
                 return;
         }
 
@@ -147,25 +153,27 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
                int alive, time_t when)
 {
        unsigned long        flags;
-        int                  rc = -ENOENT;
+        int                  found;
         kpr_nal_entry_t     *ne = NULL;
         kpr_gateway_entry_t *ge = NULL;
         struct timeval       now;
        struct list_head    *e;
        struct list_head    *n;
+        char                 str[PTL_NALFMT_SIZE];
 
-        CDEBUG (D_ERROR, "%s notifying [%d] "LPX64": %s\n", 
+        CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n", 
                 byNal ? "NAL" : "userspace", 
                 gateway_nalid, gateway_nid, alive ? "up" : "down");
 
         /* can't do predictions... */
         do_gettimeofday (&now);
         if (when > now.tv_sec) {
-                CERROR ("Ignoring prediction from %s of [%d] "LPX64" %s "
-                        "%ld seconds in the future\n", 
-                byNal ? "NAL" : "userspace", 
-                gateway_nalid, gateway_nid, alive ? "up" : "down",
-                        when - now.tv_sec);
+                CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s "
+                       "%ld seconds in the future\n", 
+                       byNal ? "NAL" : "userspace", 
+                       gateway_nalid, gateway_nid, 
+                       alive ? "up" : "down",
+                       when - now.tv_sec);
                 return (EINVAL);
         }
 
@@ -174,6 +182,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         /* Serialise with lookups (i.e. write lock) */
        write_lock_irqsave(&kpr_rwlock, flags);
 
+        found = 0;
         list_for_each_safe (e, n, &kpr_gateways) {
 
                 ge = list_entry(e, kpr_gateway_entry_t, kpge_list);
@@ -182,21 +191,21 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
                     ge->kpge_nid != gateway_nid)
                         continue;
 
-                rc = 0;
+                found = 1;
                 break;
         }
 
-        if (rc != 0) {
+        if (!found) {
                 /* gateway not found */
                 write_unlock_irqrestore(&kpr_rwlock, flags);
-                CERROR ("Gateway not found\n");
-                return (rc);
+                CDEBUG (D_NET, "Gateway not found\n");
+                return (0);
         }
         
         if (when < ge->kpge_timestamp) {
                 /* out of date information */
                 write_unlock_irqrestore (&kpr_rwlock, flags);
-                CERROR ("Out of date\n");
+                CDEBUG (D_NET, "Out of date\n");
                 return (0);
         }
 
@@ -206,7 +215,7 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         if ((!ge->kpge_alive) == (!alive)) {
                 /* new date for old news */
                 write_unlock_irqrestore (&kpr_rwlock, flags);
-                CERROR ("Old news\n");
+                CDEBUG (D_NET, "Old news\n");
                 return (0);
         }
 
@@ -223,25 +232,24 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
                 }
         }
 
+        found = 0;
         if (!byNal) {
                 /* userland notified me: notify NAL? */
                 ne = kpr_find_nal_entry_locked (ge->kpge_nalid);
                 if (ne != NULL) {
-                        if (ne->kpne_shutdown ||
-                            ne->kpne_interface.kprni_notify == NULL) {
-                                /* no need to notify */
-                                ne = NULL;
-                        } else {
+                        if (!ne->kpne_shutdown &&
+                            ne->kpne_interface.kprni_notify != NULL) {
                                 /* take a ref on this NAL until notifying
                                  * it has completed... */
                                 atomic_inc (&ne->kpne_refcount);
+                                found = 1;
                         }
                 }
         }
 
         write_unlock_irqrestore(&kpr_rwlock, flags);
 
-        if (ne != NULL) {
+        if (found) {
                 ne->kpne_interface.kprni_notify (ne->kpne_interface.kprni_arg,
                                                  gateway_nid, alive);
                 /* 'ne' can disappear now... */
@@ -250,10 +258,13 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         
         if (byNal) {
                 /* It wasn't userland that notified me... */
-                CERROR ("Doing upcall\n");
+                CWARN ("Upcall: NAL %d NID "LPX64" (%s) is %s\n",
+                       gateway_nalid, gateway_nid,
+                       portals_nid2str(gateway_nalid, gateway_nid, str),
+                       alive ? "alive" : "dead");
                 kpr_upcall (gateway_nalid, gateway_nid, alive, when);
         } else {
-                CERROR (" NOT Doing upcall\n");
+                CDEBUG (D_NET, " NOT Doing upcall\n");
         }
         
         return (0);
@@ -445,14 +456,13 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
         CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
                 target_nid, src_ne->kpne_interface.kprni_nalid);
 
-        LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
-        LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
+        LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
         
         atomic_inc (&kpr_queue_depth);
        atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
 
         kpr_fwd_packets++;                   /* (loose) stats accounting */
-        kpr_fwd_bytes += nob;
+        kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
 
        if (src_ne->kpne_shutdown)           /* caller is shutting down */
                goto out;
@@ -575,8 +585,10 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
         atomic_set (&ge->kpge_weight, 0);
 
         PORTAL_ALLOC (re, sizeof (*re));
-        if (re == NULL)
+        if (re == NULL) {
+                PORTAL_FREE (ge, sizeof (*ge));
                 return (-ENOMEM);
+        }
 
         re->kpre_lo_nid = lo_nid;
         re->kpre_hi_nid = hi_nid;