Whamcloud - gitweb
land 1.0.1 fixes on main development branch (head)
[fs/lustre-release.git] / lnet / libcfs / module.c
index e8eb290..55e1935 100644 (file)
@@ -3,19 +3,19 @@
  *
  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
  *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *   This file is part of Lustre, http://www.lustre.org.
  *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2.1 of the GNU Lesser General
- *   Public License as published by the Free Software Foundation.
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
  *
- *   Portals is distributed in the hope that it will be useful,
+ *   Lustre is distributed in the hope that it will be useful,
  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU Lesser General Public License for more details.
+ *   GNU General Public License for more details.
  *
- *   You should have received a copy of the GNU Lesser General Public
- *   License along with Portals; if not, write to the Free Software
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
@@ -83,6 +83,115 @@ kportal_daemonize (char *str)
 }
 
 void
+kportal_memhog_free (struct portals_device_userstate *pdu)
+{
+        struct page **level0p = &pdu->pdu_memhog_root_page;
+        struct page **level1p;
+        struct page **level2p;
+        int           count1;
+        int           count2;
+        
+        if (*level0p != NULL) {
+
+                level1p = (struct page **)page_address(*level0p);
+                count1 = 0;
+                
+                while (count1 < PAGE_SIZE/sizeof(struct page *) &&
+                       *level1p != NULL) {
+
+                        level2p = (struct page **)page_address(*level1p);
+                        count2 = 0;
+                        
+                        while (count2 < PAGE_SIZE/sizeof(struct page *) &&
+                               *level2p != NULL) {
+                                
+                                __free_page(*level2p);
+                                pdu->pdu_memhog_pages--;
+                                level2p++;
+                                count2++;
+                        }
+                        
+                        __free_page(*level1p);
+                        pdu->pdu_memhog_pages--;
+                        level1p++;
+                        count1++;
+                }
+                
+                __free_page(*level0p);
+                pdu->pdu_memhog_pages--;
+
+                *level0p = NULL;
+        }
+        
+        LASSERT (pdu->pdu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
+{
+        struct page **level0p;
+        struct page **level1p;
+        struct page **level2p;
+        int           count1;
+        int           count2;
+        
+        LASSERT (pdu->pdu_memhog_pages == 0);
+        LASSERT (pdu->pdu_memhog_root_page == NULL);
+
+        if (npages < 0)
+                return -EINVAL;
+
+        if (npages == 0)
+                return 0;
+
+        level0p = &pdu->pdu_memhog_root_page;
+        *level0p = alloc_page(flags);
+        if (*level0p == NULL)
+                return -ENOMEM;
+        pdu->pdu_memhog_pages++;
+
+        level1p = (struct page **)page_address(*level0p);
+        count1 = 0;
+        memset(level1p, 0, PAGE_SIZE);
+        
+        while (pdu->pdu_memhog_pages < npages &&
+               count1 < PAGE_SIZE/sizeof(struct page *)) {
+
+                if (signal_pending(current))
+                        return (-EINTR);
+                
+                *level1p = alloc_page(flags);
+                if (*level1p == NULL)
+                        return -ENOMEM;
+                pdu->pdu_memhog_pages++;
+
+                level2p = (struct page **)page_address(*level1p);
+                count2 = 0;
+                memset(level2p, 0, PAGE_SIZE);
+                
+                while (pdu->pdu_memhog_pages < npages &&
+                       count2 < PAGE_SIZE/sizeof(struct page *)) {
+                        
+                        if (signal_pending(current))
+                                return (-EINTR);
+
+                        *level2p = alloc_page(flags);
+                        if (*level2p == NULL)
+                                return (-ENOMEM);
+                        pdu->pdu_memhog_pages++;
+                        
+                        level2p++;
+                        count2++;
+                }
+                
+                level1p++;
+                count1++;
+        }
+
+        return 0;
+}
+
+void
 kportal_blockallsigs ()
 {
         unsigned long  flags;
@@ -96,22 +205,39 @@ kportal_blockallsigs ()
 /* called when opening /dev/device */
 static int kportal_psdev_open(struct inode * inode, struct file * file)
 {
+        struct portals_device_userstate *pdu;
         ENTRY;
-
+        
         if (!inode)
                 RETURN(-EINVAL);
+
         PORTAL_MODULE_USE;
+
+        PORTAL_ALLOC(pdu, sizeof(*pdu));
+        if (pdu != NULL) {
+                pdu->pdu_memhog_pages = 0;
+                pdu->pdu_memhog_root_page = NULL;
+        }
+        file->private_data = pdu;
+        
         RETURN(0);
 }
 
 /* called when closing /dev/device */
 static int kportal_psdev_release(struct inode * inode, struct file * file)
 {
+        struct portals_device_userstate *pdu;
         ENTRY;
 
         if (!inode)
                 RETURN(-EINVAL);
 
+        pdu = file->private_data;
+        if (pdu != NULL) {
+                kportal_memhog_free(pdu);
+                PORTAL_FREE(pdu, sizeof(*pdu));
+        }
+        
         PORTAL_MODULE_UNUSE;
         RETURN(0);
 }
@@ -122,8 +248,8 @@ static inline void freedata(void *data, int len)
 }
 
 static int
-kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
-                  ptl_nid_t hi_nid)
+kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, 
+                  ptl_nid_t lo_nid, ptl_nid_t hi_nid)
 {
         int rc;
         kpr_control_interface_t *ci;
@@ -139,7 +265,8 @@ kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
 }
 
 static int
-kportal_del_route(ptl_nid_t target)
+kportal_del_route(int gw_nalid, ptl_nid_t gw_nid, 
+                  ptl_nid_t lo, ptl_nid_t hi)
 {
         int rc;
         kpr_control_interface_t *ci;
@@ -148,7 +275,27 @@ kportal_del_route(ptl_nid_t target)
         if (ci == NULL)
                 return (-ENODEV);
 
-        rc = ci->kprci_del_route (target);
+        rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
+
+        PORTAL_SYMBOL_PUT(kpr_control_interface);
+        return (rc);
+}
+
+static int
+kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
+                       int alive, time_t when)
+{
+        int rc;
+        kpr_control_interface_t *ci;
+
+        /* No error if router not preset.  Sysadmin is allowed to notify
+         * _everywhere_ when a NID boots or crashes, even if they know
+         * nothing of the peer. */
+        ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
+        if (ci == NULL)
+                return (0);
+
+        rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
 
         PORTAL_SYMBOL_PUT(kpr_control_interface);
         return (rc);
@@ -156,12 +303,13 @@ kportal_del_route(ptl_nid_t target)
 
 static int
 kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
-                  ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp)
+                  ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
 {
         int       gateway_nalid;
         ptl_nid_t gateway_nid;
         ptl_nid_t lo_nid;
         ptl_nid_t hi_nid;
+        int       alive;
         int       rc;
         kpr_control_interface_t *ci;
 
@@ -169,34 +317,88 @@ kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
         if (ci == NULL)
                 return (-ENODEV);
 
-        rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, &lo_nid,
-                                 &hi_nid);
+        rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
+                                 &lo_nid, &hi_nid, &alive);
 
         if (rc == 0) {
-                CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64"\n",
-                       index, gateway_nalid, gateway_nid, lo_nid, hi_nid);
+                CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
+                       index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
+                       alive ? "up" : "down");
 
                 *gateway_nalidp = (__u32)gateway_nalid;
-                *gateway_nidp   = (__u32)gateway_nid;
-                *lo_nidp        = (__u32)lo_nid;
-                *hi_nidp        = (__u32)hi_nid;
+                *gateway_nidp   = gateway_nid;
+                *lo_nidp        = lo_nid;
+                *hi_nidp        = hi_nid;
+                *alivep         = alive;
         }
 
         PORTAL_SYMBOL_PUT (kpr_control_interface);
         return (rc);
 }
 
-static int
-kportal_nal_cmd(int nal, struct portal_ioctl_data *data)
+static int 
+kportal_router_cmd(struct portals_cfg *pcfg, void * private)
+{
+        int err = -EINVAL;
+        ENTRY;
+
+        switch(pcfg->pcfg_command) {
+        default:
+                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
+                break;
+                
+        case NAL_CMD_ADD_ROUTE:
+                CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
+                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
+                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_DEL_ROUTE:
+                CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
+                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                         pcfg->pcfg_nid2, pcfg->pcfg_nid3);
+                break;
+
+        case NAL_CMD_NOTIFY_ROUTER: {
+                CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
+                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
+                        (time_t)pcfg->pcfg_nid3);
+                
+                err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
+                                             pcfg->pcfg_flags, 
+                                             (time_t)pcfg->pcfg_nid3);
+                break;
+        }
+                
+        case NAL_CMD_GET_ROUTE:
+                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
+                err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
+                                        &pcfg->pcfg_nid, 
+                                        &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
+                                        &pcfg->pcfg_flags);
+                break;
+        }
+        RETURN(err);
+}
+
+int
+kportal_nal_cmd(struct portals_cfg *pcfg)
 {
+        __u32 nal = pcfg->pcfg_nal;
         int rc = -EINVAL;
 
         ENTRY;
 
         down(&nal_cmd_sem);
         if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
-                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, data->ioc_nal_cmd);
-                rc = nal_cmd[nal].nch_handler(data, nal_cmd[nal].nch_private);
+                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
+                       pcfg->pcfg_command);
+                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
         }
         up(&nal_cmd_sem);
         RETURN(rc);
@@ -216,6 +418,8 @@ kportal_get_ni (int nal)
                 return  (PORTAL_SYMBOL_GET(ktoenal_ni));
         case GMNAL:
                 return  (PORTAL_SYMBOL_GET(kgmnal_ni));
+        case IBNAL:
+                return  (PORTAL_SYMBOL_GET(kibnal_ni));
         case TCPNAL:
                 /* userspace NAL */
                 return (NULL);
@@ -246,6 +450,9 @@ kportal_put_ni (int nal)
         case GMNAL:
                 PORTAL_SYMBOL_PUT(kgmnal_ni);
                 break;
+        case IBNAL:
+                PORTAL_SYMBOL_PUT(kibnal_ni);
+                break;
         case TCPNAL:
                 /* A lesson to a malicious caller */
                 LBUG ();
@@ -300,9 +507,13 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
         int err = 0;
         char buf[1024];
         struct portal_ioctl_data *data;
+        char str[PTL_NALFMT_SIZE];
 
         ENTRY;
 
+        if (current->fsuid != 0)
+                RETURN(err = -EACCES);
+
         if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
              _IOC_NR(cmd) < IOC_PORTAL_MIN_NR  ||
              _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
@@ -353,8 +564,9 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
         case IOC_PORTAL_PING: {
                 void (*ping)(struct portal_ioctl_data *);
 
-                CDEBUG(D_IOCTL, "doing %d pings to nid "LPU64"\n",
-                       data->ioc_count, data->ioc_nid);
+                CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
+                       data->ioc_count, data->ioc_nid,
+                       portals_nid2str(data->ioc_nal, data->ioc_nid, str));
                 ping = PORTAL_SYMBOL_GET(kping_client);
                 if (!ping)
                         CERROR("PORTAL_SYMBOL_GET failed\n");
@@ -365,35 +577,11 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 RETURN(0);
         }
 
-        case IOC_PORTAL_ADD_ROUTE:
-                CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
-                       data->ioc_nal, data->ioc_nid, data->ioc_nid2,
-                       data->ioc_nid3);
-                err = kportal_add_route(data->ioc_nal, data->ioc_nid,
-                                        MIN (data->ioc_nid2, data->ioc_nid3),
-                                        MAX (data->ioc_nid2, data->ioc_nid3));
-                break;
-
-        case IOC_PORTAL_DEL_ROUTE:
-                CDEBUG (D_IOCTL, "Removing route to "LPU64"\n", data->ioc_nid);
-                err = kportal_del_route (data->ioc_nid);
-                break;
-
-        case IOC_PORTAL_GET_ROUTE:
-                CDEBUG (D_IOCTL, "Getting route [%d]\n", data->ioc_count);
-                err = kportal_get_route(data->ioc_count, &data->ioc_nal,
-                                        &data->ioc_nid, &data->ioc_nid2,
-                                        &data->ioc_nid3);
-                if (err == 0)
-                        if (copy_to_user((char *)arg, data, sizeof (*data)))
-                                err = -EFAULT;
-                break;
-
         case IOC_PORTAL_GET_NID: {
                 const ptl_handle_ni_t *nip;
                 ptl_process_id_t       pid;
 
-                CDEBUG (D_IOCTL, "Getting nid [%d]\n", data->ioc_nal);
+                CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
 
                 nip = kportal_get_ni (data->ioc_nal);
                 if (nip == NULL)
@@ -409,15 +597,29 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 break;
         }
 
-        case IOC_PORTAL_NAL_CMD:
-                CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", data->ioc_nal,
-                        data->ioc_nal_cmd);
-                err = kportal_nal_cmd(data->ioc_nal, data);
-                if (err == 0)
+        case IOC_PORTAL_NAL_CMD: {
+                struct portals_cfg pcfg;
+
+                LASSERT (data->ioc_plen1 == sizeof(pcfg));
+                err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1, 
+                                     sizeof(pcfg));
+                if ( err ) {
+                        EXIT;
+                        return err;
+                }
+
+                CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
+                        pcfg.pcfg_command);
+                err = kportal_nal_cmd(&pcfg);
+                if (err == 0) {
+                        if (copy_to_user((char *)data->ioc_pbuf1, &pcfg, 
+                                         sizeof (pcfg)))
+                                err = -EFAULT;
                         if (copy_to_user((char *)arg, data, sizeof (*data)))
                                 err = -EFAULT;
+                }
                 break;
-
+        }
         case IOC_PORTAL_FAIL_NID: {
                 const ptl_handle_ni_t *nip;
 
@@ -432,6 +634,42 @@ static int kportal_ioctl(struct inode *inode, struct file *file,
                 kportal_put_ni (data->ioc_nal);
                 break;
         }
+#if LWT_SUPPORT
+        case IOC_PORTAL_LWT_CONTROL: 
+                err = lwt_control (data->ioc_flags, data->ioc_misc);
+                break;
+                
+        case IOC_PORTAL_LWT_SNAPSHOT:
+                err = lwt_snapshot (&data->ioc_nid,
+                                    &data->ioc_count, &data->ioc_misc,
+                                    data->ioc_pbuf1, data->ioc_plen1);
+                if (err == 0 &&
+                    copy_to_user((char *)arg, data, sizeof (*data)))
+                        err = -EFAULT;
+                break;
+                
+        case IOC_PORTAL_LWT_LOOKUP_STRING:
+                err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1,
+                                         data->ioc_pbuf2, data->ioc_plen2);
+                if (err == 0 &&
+                    copy_to_user((char *)arg, data, sizeof (*data)))
+                        err = -EFAULT;
+                break;
+#endif
+        case IOC_PORTAL_MEMHOG:
+                if (!capable (CAP_SYS_ADMIN))
+                        err = -EPERM;
+                else if (file->private_data == NULL) {
+                        err = -EINVAL;
+                } else {
+                        kportal_memhog_free(file->private_data);
+                        err = kportal_memhog_alloc(file->private_data,
+                                                   data->ioc_count,
+                                                   data->ioc_flags);
+                        if (err != 0)
+                                kportal_memhog_free(file->private_data);
+                }
+                break;
 
         default:
                 err = -EINVAL;
@@ -467,16 +705,23 @@ static int init_kportals_module(void)
 
         rc = portals_debug_init(5 * 1024 * 1024);
         if (rc < 0) {
-                printk(KERN_ERR "portals_debug_init: %d\n", rc);
+                printk(KERN_ERR "LustreError: portals_debug_init: %d\n", rc);
                 return (rc);
         }
 
+#if LWT_SUPPORT
+        rc = lwt_init();
+        if (rc != 0) {
+                CERROR("lwt_init: error %d\n", rc);
+                goto cleanup_debug;
+        }
+#endif
         sema_init(&nal_cmd_sem, 1);
 
         rc = misc_register(&portal_dev);
         if (rc) {
                 CERROR("misc_register: error %d\n", rc);
-                goto cleanup_debug;
+                goto cleanup_lwt;
         }
 
         rc = PtlInit();
@@ -491,14 +736,26 @@ static int init_kportals_module(void)
                 goto cleanup_fini;
         }
 
+        rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
+        if (rc) {
+                CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
+                goto cleanup_proc;
+        }
+
         CDEBUG (D_OTHER, "portals setup OK\n");
         return (0);
 
+ cleanup_proc:
+        remove_proc();
  cleanup_fini:
         PtlFini();
  cleanup_deregister:
         misc_deregister(&portal_dev);
+ cleanup_lwt:
+#if LWT_SUPPORT
+        lwt_fini();
  cleanup_debug:
+#endif
         portals_debug_cleanup();
         return rc;
 }
@@ -507,6 +764,7 @@ static void exit_kportals_module(void)
 {
         int rc;
 
+        kportal_nal_unregister(ROUTER);
         remove_proc();
         PtlFini();
 
@@ -518,13 +776,17 @@ static void exit_kportals_module(void)
         if (rc)
                 CERROR("misc_deregister error %d\n", rc);
 
+#if LWT_SUPPORT
+        lwt_fini();
+#endif
+
         if (atomic_read(&portal_kmemory) != 0)
                 CERROR("Portals memory leaked: %d bytes\n",
                        atomic_read(&portal_kmemory));
 
         rc = portals_debug_cleanup();
         if (rc)
-                printk(KERN_ERR "portals_debug_cleanup: %d\n", rc);
+                printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
 }
 
 EXPORT_SYMBOL(lib_dispatch);
@@ -546,6 +808,7 @@ EXPORT_SYMBOL(portal_subsystem_debug);
 EXPORT_SYMBOL(portal_debug);
 EXPORT_SYMBOL(portal_stack);
 EXPORT_SYMBOL(portal_printk);
+EXPORT_SYMBOL(portal_cerror);
 EXPORT_SYMBOL(PtlEQWait);
 EXPORT_SYMBOL(PtlEQFree);
 EXPORT_SYMBOL(PtlEQGet);
@@ -559,6 +822,7 @@ EXPORT_SYMBOL(lib_copy_kiov2buf);
 EXPORT_SYMBOL(lib_copy_buf2kiov);
 EXPORT_SYMBOL(lib_finalize);
 EXPORT_SYMBOL(lib_parse);
+EXPORT_SYMBOL(lib_fake_reply_msg);
 EXPORT_SYMBOL(lib_init);
 EXPORT_SYMBOL(lib_fini);
 EXPORT_SYMBOL(portal_kmemory);
@@ -570,6 +834,7 @@ EXPORT_SYMBOL(kportal_assertion_failed);
 EXPORT_SYMBOL(dispatch_name);
 EXPORT_SYMBOL(kportal_get_ni);
 EXPORT_SYMBOL(kportal_put_ni);
+EXPORT_SYMBOL(kportal_nal_cmd);
 
 module_init(init_kportals_module);
 module_exit (exit_kportals_module);