Whamcloud - gitweb
LU-8926 llite: reduce jobstats race window
[fs/lustre-release.git] / lustre / obdclass / class_obd.c
index f3e428e..cf38cde 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef __KERNEL__
-# include <liblustre.h>
-#else
-# include <asm/atomic.h>
+
+#include <linux/user_namespace.h>
+#ifdef HAVE_UIDGID_HEADER
+# include <linux/uidgid.h>
 #endif
+#include <linux/atomic.h>
+#include <linux/list.h>
 
 #include <obd_support.h>
 #include <obd_class.h>
+#include <lnet/lnetctl.h>
 #include <lustre_debug.h>
 #include <lprocfs_status.h>
-#include <lustre/lustre_build_version.h>
-#include <libcfs/list.h>
+#include <lustre_ver.h>
+#include <cl_object.h>
+#ifdef HAVE_SERVER_SUPPORT
+# include <dt_object.h>
+# include <md_object.h>
+#endif /* HAVE_SERVER_SUPPORT */
+#include <lustre_ioctl.h>
 #include "llog_internal.h"
 
-#ifndef __KERNEL__
-/* liblustre workaround */
-atomic_t libcfs_kmemory = {0};
-#endif
-
 struct obd_device *obd_devs[MAX_OBD_DEVICES];
 struct list_head obd_types;
-spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_RWLOCK(obd_dev_lock);
 
-#ifndef __KERNEL__
-__u64 obd_max_pages = 0;
-__u64 obd_max_alloc = 0;
-__u64 obd_alloc;
-__u64 obd_pages;
+#ifdef CONFIG_PROC_FS
+static __u64 obd_max_alloc;
+#else
+__u64 obd_max_alloc;
 #endif
 
+static DEFINE_SPINLOCK(obd_updatemax_lock);
+
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned int obd_debug_peer_on_timeout;
+EXPORT_SYMBOL(obd_debug_peer_on_timeout);
 unsigned int obd_dump_on_timeout;
+EXPORT_SYMBOL(obd_dump_on_timeout);
 unsigned int obd_dump_on_eviction;
+EXPORT_SYMBOL(obd_dump_on_eviction);
+unsigned long obd_max_dirty_pages;
+EXPORT_SYMBOL(obd_max_dirty_pages);
+atomic_long_t obd_dirty_pages;
+EXPORT_SYMBOL(obd_dirty_pages);
 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
+EXPORT_SYMBOL(obd_timeout);
 unsigned int ldlm_timeout = LDLM_TIMEOUT_DEFAULT; /* seconds */
-unsigned int obd_max_dirty_pages = 256;
-atomic_t obd_dirty_pages;
-
-cfs_waitq_t obd_race_waitq;
-int obd_race_state;
-
-#ifdef __KERNEL__
-unsigned long obd_print_fail_loc(void)
-{
-        CWARN("obd_fail_loc = %lx\n", obd_fail_loc);
-        return obd_fail_loc;
-}
-
-void obd_set_fail_loc(unsigned int fl)
-{
-        obd_fail_loc = fl;
-}
-
-/*  opening /dev/obd */
-static int obd_class_open(unsigned long flags, void *args)
-{
-        ENTRY;
+EXPORT_SYMBOL(ldlm_timeout);
+unsigned int obd_timeout_set;
+EXPORT_SYMBOL(obd_timeout_set);
+unsigned int ldlm_timeout_set;
+EXPORT_SYMBOL(ldlm_timeout_set);
+/* bulk transfer timeout, give up after 100s by default */
+unsigned int bulk_timeout = 100; /* seconds */
+EXPORT_SYMBOL(bulk_timeout);
+/* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */
+unsigned int at_min = 0;
+EXPORT_SYMBOL(at_min);
+unsigned int at_max = 600;
+EXPORT_SYMBOL(at_max);
+unsigned int at_history = 600;
+EXPORT_SYMBOL(at_history);
+int at_early_margin = 5;
+EXPORT_SYMBOL(at_early_margin);
+int at_extra = 30;
+EXPORT_SYMBOL(at_extra);
+
+atomic_long_t obd_dirty_transit_pages;
+EXPORT_SYMBOL(obd_dirty_transit_pages);
+
+char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
+
+#ifdef CONFIG_PROC_FS
+struct lprocfs_stats *obd_memory = NULL;
+EXPORT_SYMBOL(obd_memory);
+#endif
 
-        PORTAL_MODULE_USE;
-        RETURN(0);
-}
+char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
 
-/*  closing /dev/obd */
-static int obd_class_release(unsigned long flags, void *args)
+/* Get jobid of current process by reading the environment variable
+ * stored in between the "env_start" & "env_end" of task struct.
+ *
+ * TODO:
+ * It's better to cache the jobid for later use if there is any
+ * efficient way, the cl_env code probably could be reused for this
+ * purpose.
+ *
+ * If some job scheduler doesn't store jobid in the "env_start/end",
+ * then an upcall could be issued here to get the jobid by utilizing
+ * the userspace tools/api. Then, the jobid must be cached.
+ */
+int lustre_get_jobid(char *jobid)
 {
-        ENTRY;
+       int jobid_len = LUSTRE_JOBID_SIZE;
+       char tmp_jobid[LUSTRE_JOBID_SIZE] = { 0 };
+       int rc = 0;
+       ENTRY;
+
+       /* Jobstats isn't enabled */
+       if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
+               GOTO(out, rc = 0);
+
+       /* Whole node dedicated to single job */
+       if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
+               memcpy(tmp_jobid, obd_jobid_node, LUSTRE_JOBID_SIZE);
+               GOTO(out, rc = 0);
+       }
+
+       /* Use process name + fsuid as jobid */
+       if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
+               snprintf(tmp_jobid, LUSTRE_JOBID_SIZE, "%s.%u",
+                        current_comm(),
+                        from_kuid(&init_user_ns, current_fsuid()));
+               GOTO(out, rc = 0);
+       }
+
+       rc = cfs_get_environ(obd_jobid_var, tmp_jobid, &jobid_len);
+       if (rc) {
+               if (rc == -EOVERFLOW) {
+                       /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
+                        * variable length strings instead of just numbers), it
+                        * might make sense to keep the unique parts for JobID,
+                        * instead of just returning an error.  That means a
+                        * larger temp buffer for cfs_get_environ(), then
+                        * truncating the string at some separator to fit into
+                        * the specified jobid_len.  Fix later if needed. */
+                       static bool printed;
+                       if (unlikely(!printed)) {
+                               LCONSOLE_ERROR_MSG(0x16b, "%s value too large "
+                                                  "for JobID buffer (%d)\n",
+                                                  obd_jobid_var, jobid_len);
+                               printed = true;
+                       }
+               } else {
+                       CDEBUG((rc == -ENOENT || rc == -EINVAL ||
+                               rc == -EDEADLK) ? D_INFO : D_ERROR,
+                              "Get jobid for (%s) failed: rc = %d\n",
+                              obd_jobid_var, rc);
+               }
+       }
 
-        PORTAL_MODULE_UNUSE;
-        RETURN(0);
-}
-#endif
+out:
+       if (rc != 0)
+               RETURN(rc);
 
-static inline void obd_data2conn(struct lustre_handle *conn,
-                                 struct obd_ioctl_data *data)
-{
-        memset(conn, 0, sizeof *conn);
-        conn->cookie = data->ioc_cookie;
-}
+       /* Only replace the job ID if it changed. */
+       if (strcmp(jobid, tmp_jobid) != 0)
+               memcpy(jobid, tmp_jobid, jobid_len);
 
-static inline void obd_conn2data(struct obd_ioctl_data *data,
-                                 struct lustre_handle *conn)
-{
-        data->ioc_cookie = conn->cookie;
+       RETURN(0);
 }
+EXPORT_SYMBOL(lustre_get_jobid);
 
-int class_resolve_dev_name(__u32 len, const char *name)
+static int class_resolve_dev_name(__u32 len, const char *name)
 {
         int rc;
         int dev;
@@ -171,9 +232,9 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
         }
 
         CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
-        if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
+       if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) {
                 CERROR("OBD ioctl: data error\n");
-                GOTO(out, err = -EINVAL);
+                RETURN(-EINVAL);
         }
         data = (struct obd_ioctl_data *)buf;
 
@@ -188,7 +249,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 OBD_ALLOC(lcfg, data->ioc_plen1);
                 if (lcfg == NULL)
                         GOTO(out, err = -ENOMEM);
-                err = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
+               err = copy_from_user(lcfg, data->ioc_pbuf1,
+                                         data->ioc_plen1);
                 if (!err)
                         err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
                 if (!err)
@@ -198,24 +260,24 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 GOTO(out, err);
         }
 
-        case OBD_GET_VERSION:
-                if (!data->ioc_inlbuf1) {
-                        CERROR("No buffer passed in ioctl\n");
-                        GOTO(out, err = -EINVAL);
-                }
+       case OBD_GET_VERSION:
+               if (!data->ioc_inlbuf1) {
+                       CERROR("No buffer passed in ioctl\n");
+                       GOTO(out, err = -EINVAL);
+               }
 
-                if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) {
-                        CERROR("ioctl buffer too small to hold version\n");
-                        GOTO(out, err = -EINVAL);
-                }
+               if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) {
+                       CERROR("ioctl buffer too small to hold version\n");
+                       GOTO(out, err = -EINVAL);
+               }
 
-                memcpy(data->ioc_bulk, BUILD_VERSION,
-                       strlen(BUILD_VERSION) + 1);
+               memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING,
+                      strlen(LUSTRE_VERSION_STRING) + 1);
 
-                err = obd_ioctl_popdata((void *)arg, data, len);
-                if (err)
-                        err = -EFAULT;
-                GOTO(out, err);
+               err = obd_ioctl_popdata((void __user *)arg, data, len);
+               if (err)
+                       err = -EFAULT;
+               GOTO(out, err);
 
         case OBD_IOC_NAME2DEV: {
                 /* Resolve a device name.  This does not change the
@@ -229,7 +291,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (dev < 0)
                         GOTO(out, err = -EINVAL);
 
-                err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
+               err = obd_ioctl_popdata((void __user *)arg, data,
+                                       sizeof(*data));
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -263,18 +326,13 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 
                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
                        dev);
-                err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
+               err = obd_ioctl_popdata((void __user *)arg, data,
+                                       sizeof(*data));
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
         }
 
-        case OBD_IOC_CLOSE_UUID: {
-                CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n",
-                       data->ioc_inlbuf1);
-                GOTO(out, err = 0);
-        }
-
         case OBD_IOC_GETDEVICE: {
                 int     index = data->ioc_count;
                 char    *status, *str;
@@ -304,20 +362,27 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
                          (int)index, status, obd->obd_type->typ_name,
                          obd->obd_name, obd->obd_uuid.uuid,
-                         atomic_read(&obd->obd_refcount));
-                err = obd_ioctl_popdata((void *)arg, data, len);
+                        atomic_read(&obd->obd_refcount));
+               err = obd_ioctl_popdata((void __user *)arg, data, len);
 
                 GOTO(out, err = 0);
         }
 
         }
 
-        if (data->ioc_dev >= class_devno_max()) {
+        if (data->ioc_dev == OBD_DEV_BY_DEVNAME) {
+                if (data->ioc_inllen4 <= 0 || data->ioc_inlbuf4 == NULL)
+                        GOTO(out, err = -EINVAL);
+                if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME)
+                        GOTO(out, err = -EINVAL);
+                obd = class_name2obd(data->ioc_inlbuf4);
+        } else if (data->ioc_dev < class_devno_max()) {
+                obd = class_num2obd(data->ioc_dev);
+        } else {
                 CERROR("OBD ioctl: No device\n");
                 GOTO(out, err = -EINVAL);
         }
 
-        obd = class_num2obd(data->ioc_dev);
         if (obd == NULL) {
                 CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
                 GOTO(out, err = -EINVAL);
@@ -346,7 +411,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (err)
                         GOTO(out, err);
 
-                err = obd_ioctl_popdata((void *)arg, data, len);
+               err = obd_ioctl_popdata((void __user *)arg, data, len);
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -359,160 +424,68 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
         RETURN(err);
 } /* class_handle_ioctl */
 
-
-
-#define OBD_MINOR 241
-#ifdef __KERNEL__
-/* to control /dev/obd */
-static int obd_class_ioctl (struct cfs_psdev_file *pfile, unsigned long cmd,
-                            void *arg)
-{
-        return class_handle_ioctl(cmd, (unsigned long)arg);
-}
-
-/* declare character device */
-struct cfs_psdev_ops obd_psdev_ops = {
-        /* .p_open    = */ obd_class_open,      /* open */
-        /* .p_close   = */ obd_class_release,   /* release */
-        /* .p_read    = */ NULL,
-        /* .p_write   = */ NULL,
-        /* .p_ioctl   = */ obd_class_ioctl     /* ioctl */
-};
-
-extern cfs_psdev_t obd_psdev;
-#else
-void *obd_psdev = NULL;
-#endif
-
-EXPORT_SYMBOL(obd_devs);
-EXPORT_SYMBOL(obd_print_fail_loc);
-EXPORT_SYMBOL(obd_race_waitq);
-EXPORT_SYMBOL(obd_race_state);
-EXPORT_SYMBOL(obd_debug_peer_on_timeout);
-EXPORT_SYMBOL(obd_dump_on_timeout);
-EXPORT_SYMBOL(obd_dump_on_eviction);
-EXPORT_SYMBOL(obd_timeout);
-EXPORT_SYMBOL(ldlm_timeout);
-EXPORT_SYMBOL(obd_max_dirty_pages);
-EXPORT_SYMBOL(obd_dirty_pages);
-EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
-
-EXPORT_SYMBOL(proc_lustre_root);
-
-EXPORT_SYMBOL(class_register_type);
-EXPORT_SYMBOL(class_unregister_type);
-EXPORT_SYMBOL(class_get_type);
-EXPORT_SYMBOL(class_put_type);
-EXPORT_SYMBOL(class_name2dev);
-EXPORT_SYMBOL(class_name2obd);
-EXPORT_SYMBOL(class_uuid2dev);
-EXPORT_SYMBOL(class_uuid2obd);
-EXPORT_SYMBOL(class_find_client_obd);
-EXPORT_SYMBOL(class_find_client_notype);
-EXPORT_SYMBOL(class_devices_in_group);
-EXPORT_SYMBOL(class_conn2export);
-EXPORT_SYMBOL(class_exp2obd);
-EXPORT_SYMBOL(class_conn2obd);
-EXPORT_SYMBOL(class_exp2cliimp);
-EXPORT_SYMBOL(class_conn2cliimp);
-EXPORT_SYMBOL(class_disconnect);
-EXPORT_SYMBOL(class_num2obd);
-
-/* uuid.c */
-EXPORT_SYMBOL(class_uuid_unparse);
-EXPORT_SYMBOL(lustre_uuid_to_peer);
-
-EXPORT_SYMBOL(class_handle_hash);
-EXPORT_SYMBOL(class_handle_unhash);
-EXPORT_SYMBOL(class_handle_hash_back);
-EXPORT_SYMBOL(class_handle2object);
-EXPORT_SYMBOL(class_handle_free_cb);
-
-/* obd_config.c */
-EXPORT_SYMBOL(class_incref);
-EXPORT_SYMBOL(class_decref);
-EXPORT_SYMBOL(class_get_profile);
-EXPORT_SYMBOL(class_del_profile);
-EXPORT_SYMBOL(class_del_profiles);
-EXPORT_SYMBOL(class_process_config);
-EXPORT_SYMBOL(class_process_proc_param);
-EXPORT_SYMBOL(class_config_parse_llog);
-EXPORT_SYMBOL(class_config_dump_llog);
-EXPORT_SYMBOL(class_attach);
-EXPORT_SYMBOL(class_setup);
-EXPORT_SYMBOL(class_cleanup);
-EXPORT_SYMBOL(class_detach);
-EXPORT_SYMBOL(class_manual_cleanup);
-
-/* mea.c */
-EXPORT_SYMBOL(mea_name2idx);
-EXPORT_SYMBOL(raw_name2idx);
-
 #define OBD_INIT_CHECK
 #ifdef OBD_INIT_CHECK
-int obd_init_checks(void)
+static int obd_init_checks(void)
 {
         __u64 u64val, div64val;
         char buf[64];
         int len, ret = 0;
 
-        CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s, LPSZ=%s, LPSSZ=%s\n",
-               LPU64, LPD64, LPX64, LPSZ, LPSSZ);
-
-        CDEBUG(D_INFO, "OBD_OBJECT_EOF = "LPX64"\n", (__u64)OBD_OBJECT_EOF);
+       CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF);
 
         u64val = OBD_OBJECT_EOF;
-        CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val);
+       CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
         if (u64val != OBD_OBJECT_EOF) {
-                CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+               CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
                        u64val, (int)sizeof(u64val));
                 ret = -EINVAL;
         }
-        len = snprintf(buf, sizeof(buf), LPX64, u64val);
+       len = snprintf(buf, sizeof(buf), "%#llx", u64val);
         if (len != 18) {
-                CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
+               CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
                 ret = -EINVAL;
         }
 
         div64val = OBD_OBJECT_EOF;
-        CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val);
+       CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
         if (u64val != OBD_OBJECT_EOF) {
-                CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+               CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
                        u64val, (int)sizeof(u64val));
                 ret = -EOVERFLOW;
         }
         if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
-                CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+               CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
                        u64val, (int)sizeof(u64val));
                 return -EOVERFLOW;
         }
         if (do_div(div64val, 256) != (u64val & 255)) {
-                CERROR("do_div("LPX64",256) != "LPU64"\n", u64val, u64val &255);
+               CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255);
                 return -EOVERFLOW;
         }
         if (u64val >> 8 != div64val) {
-                CERROR("do_div("LPX64",256) "LPU64" != "LPU64"\n",
+               CERROR("do_div(%#llx,256) %llu != %llu\n",
                        u64val, div64val, u64val >> 8);
                 return -EOVERFLOW;
         }
-        len = snprintf(buf, sizeof(buf), LPX64, u64val);
+       len = snprintf(buf, sizeof(buf), "%#llx", u64val);
         if (len != 18) {
-                CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
+               CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
                 ret = -EINVAL;
         }
-        len = snprintf(buf, sizeof(buf), LPU64, u64val);
+       len = snprintf(buf, sizeof(buf), "%llu", u64val);
         if (len != 20) {
-                CWARN("LPU64 wrong length! strlen(%s)=%d != 20\n", buf, len);
+               CWARN("u64 wrong length! strlen(%s)=%d != 20\n", buf, len);
                 ret = -EINVAL;
         }
-        len = snprintf(buf, sizeof(buf), LPD64, u64val);
+       len = snprintf(buf, sizeof(buf), "%lld", u64val);
         if (len != 2) {
-                CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
+               CWARN("s64 wrong length! strlen(%s)=%d != 2\n", buf, len);
                 ret = -EINVAL;
         }
-        if ((u64val & ~CFS_PAGE_MASK) >= CFS_PAGE_SIZE) {
-                CWARN("mask failed: u64val "LPU64" >= "LPU64"\n", u64val,
-                      (__u64)CFS_PAGE_SIZE);
+       if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
+               CWARN("mask failed: u64val %llu >= %llu\n", u64val,
+                     (__u64)PAGE_SIZE);
                 ret = -EINVAL;
         }
 
@@ -522,116 +495,137 @@ int obd_init_checks(void)
 #define obd_init_checks() do {} while(0)
 #endif
 
-extern spinlock_t obd_types_lock;
-extern int class_procfs_init(void);
-extern int class_procfs_clean(void);
-
-#ifdef __KERNEL__
-static int __init init_obdclass(void)
-#else
-int init_obdclass(void)
+static int __init obdclass_init(void)
+{
+       int i, err;
+
+       spin_lock_init(&obd_stale_export_lock);
+       INIT_LIST_HEAD(&obd_stale_exports);
+       atomic_set(&obd_stale_export_num, 0);
+
+       LCONSOLE_INFO("Lustre: Build Version: "LUSTRE_VERSION_STRING"\n");
+
+       spin_lock_init(&obd_types_lock);
+       obd_zombie_impexp_init();
+#ifdef CONFIG_PROC_FS
+       obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
+                                        LPROCFS_STATS_FLAG_NONE |
+                                        LPROCFS_STATS_FLAG_IRQ_SAFE);
+       if (obd_memory == NULL) {
+               CERROR("kmalloc of 'obd_memory' failed\n");
+               RETURN(-ENOMEM);
+       }
+
+       lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
+                            LPROCFS_CNTR_AVGMINMAX,
+                            "memused", "bytes");
 #endif
+       err = obd_init_checks();
+       if (err == -EOVERFLOW)
+               return err;
+
+       class_init_uuidlist();
+       err = class_handle_init();
+       if (err)
+               return err;
+
+       INIT_LIST_HEAD(&obd_types);
+
+       err = misc_register(&obd_psdev);
+       if (err) {
+               CERROR("cannot register %d err %d\n", OBD_DEV_MINOR, err);
+               return err;
+       }
+
+       /* This struct is already zeroed for us (static global) */
+       for (i = 0; i < class_devno_max(); i++)
+               obd_devs[i] = NULL;
+
+       /* Default the dirty page cache cap to 1/2 of system memory.
+        * For clients with less memory, a larger fraction is needed
+        * for other purposes (mostly for BGL). */
+       if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
+               obd_max_dirty_pages = totalram_pages / 4;
+       else
+               obd_max_dirty_pages = totalram_pages / 2;
+
+       err = obd_init_caches();
+       if (err)
+               return err;
+       err = class_procfs_init();
+       if (err)
+               return err;
+
+       err = lu_global_init();
+       if (err)
+               return err;
+
+       err = cl_global_init();
+       if (err != 0)
+               return err;
+
+#ifdef HAVE_SERVER_SUPPORT
+       err = dt_global_init();
+       if (err != 0)
+               return err;
+
+       err = lu_ucred_global_init();
+       if (err != 0)
+               return err;
+#endif /* HAVE_SERVER_SUPPORT */
+
+       err = llog_info_init();
+       if (err)
+               return err;
+
+       err = lustre_register_fs();
+
+       return err;
+}
+
+void obd_update_maxusage(void)
 {
-        int i, err;
-#ifdef __KERNEL__
-        int lustre_register_fs(void);
+       __u64 max;
 
-        for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++)
-                CFS_INIT_LIST_HEAD(&capa_list[i]);
-#endif
+       max = obd_memory_sum();
 
-        LCONSOLE_INFO("OBD class driver, http://www.lustre.org/\n");
-        LCONSOLE_INFO("        Lustre Version: "LUSTRE_VERSION_STRING"\n");
-        LCONSOLE_INFO("        Build Version: "BUILD_VERSION"\n");
-
-        spin_lock_init(&obd_types_lock);
-        cfs_waitq_init(&obd_race_waitq);
-        obd_zombie_impexp_init();
-#ifdef LPROCFS
-        obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
-                                         LPROCFS_STATS_FLAG_PERCPU);
-        if (obd_memory == NULL) {
-                CERROR("kmalloc of 'obd_memory' failed\n");
-                RETURN(-ENOMEM);
-        }
+       spin_lock(&obd_updatemax_lock);
+       if (max > obd_max_alloc)
+               obd_max_alloc = max;
+       spin_unlock(&obd_updatemax_lock);
+}
+EXPORT_SYMBOL(obd_update_maxusage);
 
-        lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
-                             LPROCFS_CNTR_AVGMINMAX,
-                             "memused", "bytes");
-        lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT,
-                             LPROCFS_CNTR_AVGMINMAX,
-                             "pagesused", "pages");
-#endif
-        err = obd_init_checks();
-        if (err == -EOVERFLOW)
-                return err;
-
-        class_init_uuidlist();
-        err = class_handle_init();
-        if (err)
-                return err;
-
-        spin_lock_init(&obd_dev_lock);
-        CFS_INIT_LIST_HEAD(&obd_types);
-
-        err = cfs_psdev_register(&obd_psdev);
-        if (err) {
-                CERROR("cannot register %d err %d\n", OBD_MINOR, err);
-                return err;
-        }
+#ifdef CONFIG_PROC_FS
+__u64 obd_memory_max(void)
+{
+       __u64 ret;
 
-        /* This struct is already zerod for us (static global) */
-        for (i = 0; i < class_devno_max(); i++)
-                obd_devs[i] = NULL;
-
-        /* Default the dirty page cache cap to 1/2 of system memory.
-         * For clients with less memory, a larger fraction is needed
-         * for other purposes (mostly for BGL). */
-        if (num_physpages <= 512 << (20 - CFS_PAGE_SHIFT))
-                obd_max_dirty_pages = num_physpages / 4;
-        else
-                obd_max_dirty_pages = num_physpages / 2;
-
-        err = obd_init_caches();
-        if (err)
-                return err;
-#ifdef __KERNEL__
-        err = lu_global_init();
-        if (err)
-                return err;
-        err = class_procfs_init();
-        if (err)
-                return err;
-        err = lustre_register_fs();
-#endif
+       obd_update_maxusage();
+       spin_lock(&obd_updatemax_lock);
+       ret = obd_max_alloc;
+       spin_unlock(&obd_updatemax_lock);
 
-        return err;
+       return ret;
 }
+#endif /* CONFIG_PROC_FS */
 
-/* liblustre doesn't call cleanup_obdclass, apparently.  we carry on in this
- * ifdef to the end of the file to cover module and versioning goo.*/
-#ifdef __KERNEL__
-static void cleanup_obdclass(void)
+static void __exit obdclass_exit(void)
 {
-        int i;
-        int lustre_unregister_fs(void);
-        __u64 memory_leaked, pages_leaked;
-        __u64 memory_max, pages_max;
-        ENTRY;
+       __u64 memory_leaked;
+       __u64 memory_max;
+       ENTRY;
 
-        lustre_unregister_fs();
+       lustre_unregister_fs();
 
-        cfs_psdev_deregister(&obd_psdev);
-        for (i = 0; i < class_devno_max(); i++) {
-                struct obd_device *obd = class_num2obd(i);
-                if (obd && obd->obd_set_up &&
-                    OBT(obd) && OBP(obd, detach)) {
-                        /* XXX should this call generic detach otherwise? */
-                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-                        OBP(obd, detach)(obd);
-                }
-        }
-        lu_global_fini();
+       misc_deregister(&obd_psdev);
+       llog_info_fini();
+#ifdef HAVE_SERVER_SUPPORT
+       lu_ucred_global_fini();
+       dt_global_fini();
+#endif /* HAVE_SERVER_SUPPORT */
+       cl_global_fini();
+       lu_global_fini();
 
         obd_cleanup_caches();
         obd_sysctl_clean();
@@ -641,29 +635,24 @@ static void cleanup_obdclass(void)
         class_handle_cleanup();
         class_exit_uuidlist();
         obd_zombie_impexp_stop();
+       LASSERT(list_empty(&obd_stale_exports));
 
         memory_leaked = obd_memory_sum();
-        pages_leaked = obd_pages_sum();
 
         memory_max = obd_memory_max();
-        pages_max = obd_pages_max();
 
         lprocfs_free_stats(&obd_memory);
-        if (memory_leaked > 0) {
-                CWARN("Memory leaks detected (max "LPU64", leaked "LPD64")\n",
-                      memory_max, memory_leaked);
-        }
-        if (pages_leaked > 0) {
-                CWARN("Page leaks detected (max "LPU64", leaked "LPU64")\n",
-                      pages_max, pages_leaked);
-        }
+        CDEBUG((memory_leaked) ? D_ERROR : D_INFO,
+              "obd_memory max: %llu, leaked: %llu\n",
+               memory_max, memory_leaked);
 
         EXIT;
 }
 
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
+MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Class Driver");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
 MODULE_LICENSE("GPL");
 
-cfs_module(obdclass, LUSTRE_VERSION_STRING, init_obdclass, cleanup_obdclass);
-#endif
+module_init(obdclass_init);
+module_exit(obdclass_exit);