Whamcloud - gitweb
* removed a diff that crept in somehow
[fs/lustre-release.git] / lnet / router / router.c
index f082415..7edc5f6 100644 (file)
@@ -27,6 +27,7 @@ LIST_HEAD(kpr_routes);
 LIST_HEAD(kpr_gateways);
 LIST_HEAD(kpr_nals);
 
+unsigned int       kpr_routes_generation;
 unsigned long long kpr_fwd_bytes;
 unsigned long      kpr_fwd_packets;
 unsigned long      kpr_fwd_errors;
@@ -48,13 +49,6 @@ kpr_router_interface_t kpr_router_interface = {
        kprri_deregister:       kpr_deregister_nal,
 };
 
-kpr_control_interface_t kpr_control_interface = {
-       kprci_add_route:        kpr_add_route,
-       kprci_del_route:        kpr_del_route,
-       kprci_get_route:        kpr_get_route,
-       kprci_notify:           kpr_sys_notify,
-};
-
 int
 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
 {
@@ -62,7 +56,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
        struct list_head  *e;
        kpr_nal_entry_t   *ne;
 
-        CDEBUG (D_NET, "Registering NAL %d\n", nalif->kprni_nalid);
+        CDEBUG (D_NET, "Registering NAL %x\n", nalif->kprni_nalid);
 
        PORTAL_ALLOC (ne, sizeof (*ne));
        if (ne == NULL)
@@ -82,7 +76,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
                {
                        write_unlock_irqrestore (&kpr_rwlock, flags);
 
-                       CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid);
+                       CERROR ("Attempt to register same NAL %x twice\n", ne->kpne_interface.kprni_nalid);
 
                        PORTAL_FREE (ne, sizeof (*ne));
                        return (-EEXIST);
@@ -126,12 +120,16 @@ kpr_do_upcall (void *arg)
 void
 kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when)
 {
+        char str[PTL_NALFMT_SIZE];
+        
         /* May be in arbitrary context */
         kpr_upcall_t  *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC);
 
         if (u == NULL) {
-                CERROR ("Upcall out of memory: nal %d nid "LPX64" %s\n",
-                        gw_nalid, gw_nid, alive ? "up" : "down");
+                CERROR ("Upcall out of memory: nal %x nid "LPX64" (%s) %s\n",
+                        gw_nalid, gw_nid,
+                        portals_nid2str(gw_nalid, gw_nid, str),
+                        alive ? "up" : "down");
                 return;
         }
 
@@ -155,15 +153,16 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         struct timeval       now;
        struct list_head    *e;
        struct list_head    *n;
+        char                 str[PTL_NALFMT_SIZE];
 
-        CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n", 
+        CDEBUG (D_NET, "%s notifying [%x] "LPX64": %s\n", 
                 byNal ? "NAL" : "userspace", 
                 gateway_nalid, gateway_nid, alive ? "up" : "down");
 
         /* can't do predictions... */
         do_gettimeofday (&now);
         if (when > now.tv_sec) {
-                CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s "
+                CWARN ("Ignoring prediction from %s of [%x] "LPX64" %s "
                        "%ld seconds in the future\n", 
                        byNal ? "NAL" : "userspace", 
                        gateway_nalid, gateway_nid, 
@@ -253,8 +252,9 @@ kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
         
         if (byNal) {
                 /* It wasn't userland that notified me... */
-                CWARN ("Upcall: NAL %d NID "LPX64" is %s\n",
+                CWARN ("Upcall: NAL %x NID "LPX64" (%s) is %s\n",
                        gateway_nalid, gateway_nid,
+                       portals_nid2str(gateway_nalid, gateway_nid, str),
                        alive ? "alive" : "dead");
                 kpr_upcall (gateway_nalid, gateway_nid, alive, when);
         } else {
@@ -278,23 +278,14 @@ kpr_shutdown_nal (void *arg)
        unsigned long    flags;
        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
 
-        CDEBUG (D_NET, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
+        CDEBUG (D_NET, "Shutting down NAL %x\n", ne->kpne_interface.kprni_nalid);
 
        LASSERT (!ne->kpne_shutdown);
        LASSERT (!in_interrupt());
 
-       write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */
+       write_lock_irqsave (&kpr_rwlock, flags);
        ne->kpne_shutdown = 1;
-       write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */
-
-       while (atomic_read (&ne->kpne_refcount) != 0)
-       {
-               CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
-                       ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
-
-               set_current_state (TASK_UNINTERRUPTIBLE);
-               schedule_timeout (HZ);
-       }
+       write_unlock_irqrestore (&kpr_rwlock, flags);
 }
 
 void
@@ -303,18 +294,25 @@ kpr_deregister_nal (void *arg)
        unsigned long     flags;
        kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
 
-        CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
+        CDEBUG (D_NET, "Deregister NAL %x\n", ne->kpne_interface.kprni_nalid);
 
        LASSERT (ne->kpne_shutdown);            /* caller must have issued shutdown already */
-       LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */
        LASSERT (!in_interrupt());
 
        write_lock_irqsave (&kpr_rwlock, flags);
-
        list_del (&ne->kpne_list);
-
        write_unlock_irqrestore (&kpr_rwlock, flags);
 
+        /* Wait until all outstanding messages/notifications have completed */
+       while (atomic_read (&ne->kpne_refcount) != 0)
+       {
+               CDEBUG (D_NET, "Waiting for refcount on NAL %x to reach zero (%d)\n",
+                       ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
+
+               set_current_state (TASK_UNINTERRUPTIBLE);
+               schedule_timeout (HZ);
+       }
+
        PORTAL_FREE (ne, sizeof (*ne));
         PORTAL_MODULE_UNUSE;
 }
@@ -369,14 +367,17 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob,
         /* Caller wants to know if 'target_nid' can be reached via a gateway
          * ON HER OWN NETWORK */
 
-        CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid, 
+        CDEBUG (D_NET, "lookup "LPX64" from NAL %x\n", target_nid, 
                 ne->kpne_interface.kprni_nalid);
-
-       if (ne->kpne_shutdown)          /* caller is shutting down */
-               return (-ENOENT);
+        LASSERT (!in_interrupt());
 
        read_lock (&kpr_rwlock);
 
+       if (ne->kpne_shutdown) {        /* caller is shutting down */
+                read_unlock (&kpr_rwlock);
+               return (-ENOENT);
+        }
+
        /* Search routes for one that has a gateway to target_nid on the callers network */
 
         list_for_each (e, &kpr_routes) {
@@ -410,7 +411,7 @@ kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob,
 
         /* NB can't deref 're' now; it might have been removed! */
 
-        CDEBUG (D_NET, "lookup "LPX64" from NAL %d: %d ("LPX64")\n",
+        CDEBUG (D_NET, "lookup "LPX64" from NAL %x: %d ("LPX64")\n",
                 target_nid, ne->kpne_interface.kprni_nalid, rc,
                 (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
        return (rc);
@@ -446,26 +447,26 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
        struct list_head    *e;
         kpr_route_entry_t   *re;
         kpr_nal_entry_t     *tmp_ne;
+        int                  rc;
 
-        CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
+        CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x\n", fwd,
                 target_nid, src_ne->kpne_interface.kprni_nalid);
 
-        LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
-        LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
-        
-        atomic_inc (&kpr_queue_depth);
-       atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
+        LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
+        LASSERT (!in_interrupt());
+
+       read_lock (&kpr_rwlock);
 
         kpr_fwd_packets++;                   /* (loose) stats accounting */
-        kpr_fwd_bytes += nob;
+        kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
 
-       if (src_ne->kpne_shutdown)           /* caller is shutting down */
+       if (src_ne->kpne_shutdown) {         /* caller is shutting down */
+                rc = -ESHUTDOWN;
                goto out;
+        }
 
        fwd->kprfd_router_arg = src_ne;      /* stash caller's nal entry */
 
-       read_lock (&kpr_rwlock);
-
        /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
 
         list_for_each (e, &kpr_routes) {
@@ -502,12 +503,14 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
                 kpr_update_weight (ge, nob);
 
                 fwd->kprfd_gateway_nid = ge->kpge_nid;
-                atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */
+                atomic_inc (&src_ne->kpne_refcount); /* source and dest nals are */
+                atomic_inc (&dst_ne->kpne_refcount); /* busy until fwd completes */
+                atomic_inc (&kpr_queue_depth);
 
                 read_unlock (&kpr_rwlock);
 
-                CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d: "
-                        "to "LPX64" on NAL %d\n", 
+                CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x: "
+                        "to "LPX64" on NAL %x\n", 
                         fwd, target_nid, src_ne->kpne_interface.kprni_nalid,
                         fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
 
@@ -515,18 +518,16 @@ kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
                 return;
        }
 
-        read_unlock (&kpr_rwlock);
+        rc = -EHOSTUNREACH;
  out:
         kpr_fwd_errors++;
 
-        CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd,
-                target_nid, src_ne->kpne_interface.kprni_nalid);
+        CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %x: %d\n", 
+                fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc);
 
-       /* Can't find anywhere to forward to */
-       (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH);
+       (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc);
 
-        atomic_dec (&kpr_queue_depth);
-       atomic_dec (&src_ne->kpne_refcount);
+        read_unlock (&kpr_rwlock);
 }
 
 void
@@ -535,14 +536,14 @@ kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
        kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
        kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
 
-        CDEBUG (D_NET, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd,
+        CDEBUG (D_NET, "complete(1) [%p] from NAL %x to NAL %x: %d\n", fwd,
                 src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
 
        atomic_dec (&dst_ne->kpne_refcount);    /* CAVEAT EMPTOR dst_ne can disappear now!!! */
 
        (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
 
-        CDEBUG (D_NET, "complete(2) [%p] from NAL %d: %d\n", fwd,
+        CDEBUG (D_NET, "complete(2) [%p] from NAL %x: %d\n", fwd,
                 src_ne->kpne_interface.kprni_nalid, error);
 
         atomic_dec (&kpr_queue_depth);
@@ -559,7 +560,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
         kpr_gateway_entry_t *ge;
         int                  dup = 0;
 
-        CDEBUG(D_NET, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n",
+        CDEBUG(D_NET, "Add route: %x "LPX64" : "LPX64" - "LPX64"\n",
                gateway_nalid, gateway_nid, lo_nid, hi_nid);
 
         if (gateway_nalid == PTL_NID_ANY ||
@@ -580,8 +581,10 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
         atomic_set (&ge->kpge_weight, 0);
 
         PORTAL_ALLOC (re, sizeof (*re));
-        if (re == NULL)
+        if (re == NULL) {
+                PORTAL_FREE (ge, sizeof (*ge));
                 return (-ENOMEM);
+        }
 
         re->kpre_lo_nid = lo_nid;
         re->kpre_hi_nid = hi_nid;
@@ -592,7 +595,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
         list_for_each (e, &kpr_gateways) {
                 kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
                                                       kpge_list);
-                
+
                 if (ge2->kpge_nalid == gateway_nalid &&
                     ge2->kpge_nid == gateway_nid) {
                         PORTAL_FREE (ge, sizeof (*ge));
@@ -604,7 +607,6 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
 
         if (!dup) {
                 /* Adding a new gateway... */
                 list_add (&ge->kpge_list, &kpr_gateways);
 
                 /* ...zero all gateway weights so this one doesn't have to
@@ -615,12 +617,12 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
                                                               kpge_list);
                         atomic_set (&ge2->kpge_weight, 0);
                 }
-                
         }
 
         re->kpre_gateway = ge;
         ge->kpge_refcount++;
         list_add (&re->kpre_list, &kpr_routes);
+        kpr_routes_generation++;
 
         write_unlock_irqrestore (&kpr_rwlock, flags);
         return (0);
@@ -628,7 +630,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
 
 int
 kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid,
-            int alive, time_t when)
+                int alive, time_t when)
 {
         return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when));
 }
@@ -638,12 +640,12 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid,
                ptl_nid_t lo, ptl_nid_t hi)
 {
         int                specific = (lo != PTL_NID_ANY);
-       unsigned long      flags;
+        unsigned long      flags;
         int                rc = -ENOENT;
-       struct list_head  *e;
-       struct list_head  *n;
+        struct list_head  *e;
+        struct list_head  *n;
 
-        CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", 
+        CDEBUG(D_NET, "Del route [%x] "LPX64" : "LPX64" - "LPX64"\n",
                gw_nalid, gw_nid, lo, hi);
 
         LASSERT(!in_interrupt());
@@ -651,20 +653,19 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid,
         /* NB Caller may specify either all routes via the given gateway
          * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are
          * actual NIDs) */
-        
         if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY))
                 return (-EINVAL);
 
-       write_lock_irqsave(&kpr_rwlock, flags);
+        write_lock_irqsave(&kpr_rwlock, flags);
 
         list_for_each_safe (e, n, &kpr_routes) {
                 kpr_route_entry_t   *re = list_entry(e, kpr_route_entry_t,
                                                    kpre_list);
                 kpr_gateway_entry_t *ge = re->kpre_gateway;
-                
+
                 if (ge->kpge_nalid != gw_nalid ||
                     ge->kpge_nid != gw_nid ||
-                    (specific && 
+                    (specific &&
                      (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid)))
                         continue;
 
@@ -682,16 +683,19 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid,
                         break;
         }
 
+        kpr_routes_generation++;
         write_unlock_irqrestore(&kpr_rwlock, flags);
+
         return (rc);
 }
 
 int
-kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
-               ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive)
+kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid,
+               ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive)
 {
        struct list_head  *e;
 
+        LASSERT (!in_interrupt());
        read_lock(&kpr_rwlock);
 
         for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
@@ -715,11 +719,67 @@ kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
         return (-ENOENT);
 }
 
+static int 
+kpr_nal_cmd(struct portals_cfg *pcfg, void * private)
+{
+        int err = -EINVAL;
+        ENTRY;
+
+        switch(pcfg->pcfg_command) {
+        default:
+                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
+                break;
+                
+        case NAL_CMD_ADD_ROUTE:
+                CDEBUG(D_IOCTL, "Adding route: [%x] "LPU64" : "LPU64" - "LPU64"\n",
+                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
+                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                    pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_DEL_ROUTE:
+                CDEBUG (D_IOCTL, "Removing routes via [%x] "LPU64" : "LPU64" - "LPU64"\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
+                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                     pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_NOTIFY_ROUTER: {
+                CDEBUG (D_IOCTL, "Notifying peer [%x] "LPU64" %s @ %ld\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
+                        (time_t)pcfg->pcfg_nid3);
+                
+                err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                      pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3);
+                break;
+        }
+                
+        case NAL_CMD_GET_ROUTE:
+                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
+                err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
+                                    &pcfg->pcfg_nid, 
+                                    &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
+                                    &pcfg->pcfg_flags);
+                break;
+        }
+        RETURN(err);
+}
+
+
 static void /*__exit*/
 kpr_finalise (void)
 {
         LASSERT (list_empty (&kpr_nals));
 
+        libcfs_nal_cmd_unregister(ROUTER);
+
+        PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
+
+        kpr_proc_fini();
+
         while (!list_empty (&kpr_routes)) {
                 kpr_route_entry_t *re = list_entry(kpr_routes.next,
                                                    kpr_route_entry_t,
@@ -729,11 +789,6 @@ kpr_finalise (void)
                 PORTAL_FREE(re, sizeof (*re));
         }
 
-        kpr_proc_fini();
-
-        PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
-        PORTAL_SYMBOL_UNREGISTER(kpr_control_interface);
-
         CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
                atomic_read(&portal_kmemory));
 }
@@ -741,13 +796,21 @@ kpr_finalise (void)
 static int __init
 kpr_initialise (void)
 {
+        int     rc;
+        
         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
                atomic_read(&portal_kmemory));
 
+        kpr_routes_generation = 0;
         kpr_proc_init();
 
+        rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL);
+        if (rc != 0) {
+                CERROR("Can't register nal cmd handler\n");
+                return (rc);
+        }
+        
         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
-        PORTAL_SYMBOL_REGISTER(kpr_control_interface);
         return (0);
 }
 
@@ -758,5 +821,4 @@ MODULE_LICENSE("GPL");
 module_init (kpr_initialise);
 module_exit (kpr_finalise);
 
-EXPORT_SYMBOL (kpr_control_interface);
 EXPORT_SYMBOL (kpr_router_interface);