Whamcloud - gitweb
LU-8926 llite: reduce jobstats race window
[fs/lustre-release.git] / lustre / obdclass / class_obd.c
index 1c93825..cf38cde 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#include <asm/atomic.h>
+
+#include <linux/user_namespace.h>
+#ifdef HAVE_UIDGID_HEADER
+# include <linux/uidgid.h>
+#endif
+#include <linux/atomic.h>
+#include <linux/list.h>
 
 #include <obd_support.h>
 #include <obd_class.h>
 #include <lnet/lnetctl.h>
 #include <lustre_debug.h>
 #include <lprocfs_status.h>
-#include <lustre/lustre_build_version.h>
-#include <libcfs/list.h>
+#include <lustre_ver.h>
 #include <cl_object.h>
 #ifdef HAVE_SERVER_SUPPORT
 # include <dt_object.h>
 #include <lustre_ioctl.h>
 #include "llog_internal.h"
 
-
 struct obd_device *obd_devs[MAX_OBD_DEVICES];
-EXPORT_SYMBOL(obd_devs);
 struct list_head obd_types;
 DEFINE_RWLOCK(obd_dev_lock);
 
-__u64 obd_max_pages = 0;
-__u64 obd_max_alloc = 0;
+#ifdef CONFIG_PROC_FS
+static __u64 obd_max_alloc;
+#else
+__u64 obd_max_alloc;
+#endif
 
 static DEFINE_SPINLOCK(obd_updatemax_lock);
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
-unsigned int obd_alloc_fail_rate = 0;
-EXPORT_SYMBOL(obd_alloc_fail_rate);
 unsigned int obd_debug_peer_on_timeout;
 EXPORT_SYMBOL(obd_debug_peer_on_timeout);
 unsigned int obd_dump_on_timeout;
 EXPORT_SYMBOL(obd_dump_on_timeout);
 unsigned int obd_dump_on_eviction;
 EXPORT_SYMBOL(obd_dump_on_eviction);
-unsigned int obd_max_dirty_pages = 256;
+unsigned long obd_max_dirty_pages;
 EXPORT_SYMBOL(obd_max_dirty_pages);
-atomic_t obd_dirty_pages;
+atomic_long_t obd_dirty_pages;
 EXPORT_SYMBOL(obd_dirty_pages);
 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
 EXPORT_SYMBOL(obd_timeout);
@@ -99,17 +99,18 @@ EXPORT_SYMBOL(at_early_margin);
 int at_extra = 30;
 EXPORT_SYMBOL(at_extra);
 
-atomic_t obd_dirty_transit_pages;
+atomic_long_t obd_dirty_transit_pages;
 EXPORT_SYMBOL(obd_dirty_transit_pages);
 
 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
-EXPORT_SYMBOL(obd_jobid_var);
 
-#ifdef LPROCFS
+#ifdef CONFIG_PROC_FS
 struct lprocfs_stats *obd_memory = NULL;
 EXPORT_SYMBOL(obd_memory);
 #endif
 
+char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
+
 /* Get jobid of current process by reading the environment variable
  * stored in between the "env_start" & "env_end" of task struct.
  *
@@ -124,24 +125,30 @@ EXPORT_SYMBOL(obd_memory);
  */
 int lustre_get_jobid(char *jobid)
 {
-       int jobid_len = JOBSTATS_JOBID_SIZE;
+       int jobid_len = LUSTRE_JOBID_SIZE;
+       char tmp_jobid[LUSTRE_JOBID_SIZE] = { 0 };
        int rc = 0;
        ENTRY;
 
-       memset(jobid, 0, JOBSTATS_JOBID_SIZE);
        /* Jobstats isn't enabled */
        if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
-               RETURN(0);
+               GOTO(out, rc = 0);
+
+       /* Whole node dedicated to single job */
+       if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
+               memcpy(tmp_jobid, obd_jobid_node, LUSTRE_JOBID_SIZE);
+               GOTO(out, rc = 0);
+       }
 
        /* Use process name + fsuid as jobid */
        if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
-               snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u",
+               snprintf(tmp_jobid, LUSTRE_JOBID_SIZE, "%s.%u",
                         current_comm(),
                         from_kuid(&init_user_ns, current_fsuid()));
-               RETURN(0);
+               GOTO(out, rc = 0);
        }
 
-       rc = cfs_get_environ(obd_jobid_var, jobid, &jobid_len);
+       rc = cfs_get_environ(obd_jobid_var, tmp_jobid, &jobid_len);
        if (rc) {
                if (rc == -EOVERFLOW) {
                        /* For the PBS_JOBID and LOADL_STEP_ID keys (which are
@@ -165,30 +172,18 @@ int lustre_get_jobid(char *jobid)
                               obd_jobid_var, rc);
                }
        }
-       RETURN(rc);
-}
-EXPORT_SYMBOL(lustre_get_jobid);
 
-int obd_alloc_fail(const void *ptr, const char *name, const char *type,
-                  size_t size, const char *file, int line)
-{
-       if (ptr == NULL ||
-           (cfs_rand() & OBD_ALLOC_FAIL_MASK) < obd_alloc_fail_rate) {
-               CERROR("%s%salloc of %s ("LPU64" bytes) failed at %s:%d\n",
-                      ptr ? "force " :"", type, name, (__u64)size, file,
-                      line);
-               CERROR(LPU64" total bytes and "LPU64" total pages "
-                      "("LPU64" bytes) allocated by Lustre, "
-                      "%d total bytes by LNET\n",
-                      obd_memory_sum(),
-                      obd_pages_sum() << PAGE_CACHE_SHIFT,
-                      obd_pages_sum(),
-                       atomic_read(&libcfs_kmemory));
-               return 1;
-       }
-       return 0;
+out:
+       if (rc != 0)
+               RETURN(rc);
+
+       /* Only replace the job ID if it changed. */
+       if (strcmp(jobid, tmp_jobid) != 0)
+               memcpy(jobid, tmp_jobid, jobid_len);
+
+       RETURN(0);
 }
-EXPORT_SYMBOL(obd_alloc_fail);
+EXPORT_SYMBOL(lustre_get_jobid);
 
 static int class_resolve_dev_name(__u32 len, const char *name)
 {
@@ -237,7 +232,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
         }
 
         CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
-        if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
+       if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) {
                 CERROR("OBD ioctl: data error\n");
                 RETURN(-EINVAL);
         }
@@ -265,24 +260,24 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 GOTO(out, err);
         }
 
-        case OBD_GET_VERSION:
-                if (!data->ioc_inlbuf1) {
-                        CERROR("No buffer passed in ioctl\n");
-                        GOTO(out, err = -EINVAL);
-                }
+       case OBD_GET_VERSION:
+               if (!data->ioc_inlbuf1) {
+                       CERROR("No buffer passed in ioctl\n");
+                       GOTO(out, err = -EINVAL);
+               }
 
-                if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) {
-                        CERROR("ioctl buffer too small to hold version\n");
-                        GOTO(out, err = -EINVAL);
-                }
+               if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) {
+                       CERROR("ioctl buffer too small to hold version\n");
+                       GOTO(out, err = -EINVAL);
+               }
 
-                memcpy(data->ioc_bulk, BUILD_VERSION,
-                       strlen(BUILD_VERSION) + 1);
+               memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING,
+                      strlen(LUSTRE_VERSION_STRING) + 1);
 
-                err = obd_ioctl_popdata((void *)arg, data, len);
-                if (err)
-                        err = -EFAULT;
-                GOTO(out, err);
+               err = obd_ioctl_popdata((void __user *)arg, data, len);
+               if (err)
+                       err = -EFAULT;
+               GOTO(out, err);
 
         case OBD_IOC_NAME2DEV: {
                 /* Resolve a device name.  This does not change the
@@ -296,7 +291,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (dev < 0)
                         GOTO(out, err = -EINVAL);
 
-                err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
+               err = obd_ioctl_popdata((void __user *)arg, data,
+                                       sizeof(*data));
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -330,7 +326,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 
                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
                        dev);
-                err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
+               err = obd_ioctl_popdata((void __user *)arg, data,
+                                       sizeof(*data));
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -366,7 +363,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                          (int)index, status, obd->obd_type->typ_name,
                          obd->obd_name, obd->obd_uuid.uuid,
                         atomic_read(&obd->obd_refcount));
-                err = obd_ioctl_popdata((void *)arg, data, len);
+               err = obd_ioctl_popdata((void __user *)arg, data, len);
 
                 GOTO(out, err = 0);
         }
@@ -414,7 +411,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (err)
                         GOTO(out, err);
 
-                err = obd_ioctl_popdata((void *)arg, data, len);
+               err = obd_ioctl_popdata((void __user *)arg, data, len);
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -427,8 +424,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
         RETURN(err);
 } /* class_handle_ioctl */
 
-extern struct miscdevice obd_psdev;
-
 #define OBD_INIT_CHECK
 #ifdef OBD_INIT_CHECK
 static int obd_init_checks(void)
@@ -437,62 +432,60 @@ static int obd_init_checks(void)
         char buf[64];
         int len, ret = 0;
 
-        CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", LPU64, LPD64, LPX64);
-
-        CDEBUG(D_INFO, "OBD_OBJECT_EOF = "LPX64"\n", (__u64)OBD_OBJECT_EOF);
+       CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF);
 
         u64val = OBD_OBJECT_EOF;
-        CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val);
+       CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
         if (u64val != OBD_OBJECT_EOF) {
-                CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+               CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
                        u64val, (int)sizeof(u64val));
                 ret = -EINVAL;
         }
-        len = snprintf(buf, sizeof(buf), LPX64, u64val);
+       len = snprintf(buf, sizeof(buf), "%#llx", u64val);
         if (len != 18) {
-                CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
+               CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
                 ret = -EINVAL;
         }
 
         div64val = OBD_OBJECT_EOF;
-        CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = "LPX64"\n", u64val);
+       CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val);
         if (u64val != OBD_OBJECT_EOF) {
-                CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+               CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
                        u64val, (int)sizeof(u64val));
                 ret = -EOVERFLOW;
         }
         if (u64val >> 8 != OBD_OBJECT_EOF >> 8) {
-                CERROR("__u64 "LPX64"(%d) != 0xffffffffffffffff\n",
+               CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n",
                        u64val, (int)sizeof(u64val));
                 return -EOVERFLOW;
         }
         if (do_div(div64val, 256) != (u64val & 255)) {
-                CERROR("do_div("LPX64",256) != "LPU64"\n", u64val, u64val &255);
+               CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255);
                 return -EOVERFLOW;
         }
         if (u64val >> 8 != div64val) {
-                CERROR("do_div("LPX64",256) "LPU64" != "LPU64"\n",
+               CERROR("do_div(%#llx,256) %llu != %llu\n",
                        u64val, div64val, u64val >> 8);
                 return -EOVERFLOW;
         }
-        len = snprintf(buf, sizeof(buf), LPX64, u64val);
+       len = snprintf(buf, sizeof(buf), "%#llx", u64val);
         if (len != 18) {
-                CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len);
+               CWARN("u64 hex wrong length! strlen(%s)=%d != 18\n", buf, len);
                 ret = -EINVAL;
         }
-        len = snprintf(buf, sizeof(buf), LPU64, u64val);
+       len = snprintf(buf, sizeof(buf), "%llu", u64val);
         if (len != 20) {
-                CWARN("LPU64 wrong length! strlen(%s)=%d != 20\n", buf, len);
+               CWARN("u64 wrong length! strlen(%s)=%d != 20\n", buf, len);
                 ret = -EINVAL;
         }
-        len = snprintf(buf, sizeof(buf), LPD64, u64val);
+       len = snprintf(buf, sizeof(buf), "%lld", u64val);
         if (len != 2) {
-                CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
+               CWARN("s64 wrong length! strlen(%s)=%d != 2\n", buf, len);
                 ret = -EINVAL;
         }
-       if ((u64val & ~CFS_PAGE_MASK) >= PAGE_CACHE_SIZE) {
-                CWARN("mask failed: u64val "LPU64" >= "LPU64"\n", u64val,
-                     (__u64)PAGE_CACHE_SIZE);
+       if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
+               CWARN("mask failed: u64val %llu >= %llu\n", u64val,
+                     (__u64)PAGE_SIZE);
                 ret = -EINVAL;
         }
 
@@ -502,45 +495,39 @@ static int obd_init_checks(void)
 #define obd_init_checks() do {} while(0)
 #endif
 
-extern int class_procfs_init(void);
-extern int class_procfs_clean(void);
-
-static int __init init_obdclass(void)
+static int __init obdclass_init(void)
 {
-        int i, err;
-        int lustre_register_fs(void);
+       int i, err;
 
-        for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++)
-               INIT_LIST_HEAD(&capa_list[i]);
+       spin_lock_init(&obd_stale_export_lock);
+       INIT_LIST_HEAD(&obd_stale_exports);
+       atomic_set(&obd_stale_export_num, 0);
 
-        LCONSOLE_INFO("Lustre: Build Version: "BUILD_VERSION"\n");
+       LCONSOLE_INFO("Lustre: Build Version: "LUSTRE_VERSION_STRING"\n");
 
        spin_lock_init(&obd_types_lock);
-        obd_zombie_impexp_init();
-#ifdef LPROCFS
-        obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
+       obd_zombie_impexp_init();
+#ifdef CONFIG_PROC_FS
+       obd_memory = lprocfs_alloc_stats(OBD_STATS_NUM,
                                         LPROCFS_STATS_FLAG_NONE |
                                         LPROCFS_STATS_FLAG_IRQ_SAFE);
-        if (obd_memory == NULL) {
-                CERROR("kmalloc of 'obd_memory' failed\n");
-                RETURN(-ENOMEM);
-        }
+       if (obd_memory == NULL) {
+               CERROR("kmalloc of 'obd_memory' failed\n");
+               RETURN(-ENOMEM);
+       }
 
-        lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
-                             LPROCFS_CNTR_AVGMINMAX,
-                             "memused", "bytes");
-        lprocfs_counter_init(obd_memory, OBD_MEMORY_PAGES_STAT,
-                             LPROCFS_CNTR_AVGMINMAX,
-                             "pagesused", "pages");
+       lprocfs_counter_init(obd_memory, OBD_MEMORY_STAT,
+                            LPROCFS_CNTR_AVGMINMAX,
+                            "memused", "bytes");
 #endif
-        err = obd_init_checks();
-        if (err == -EOVERFLOW)
-                return err;
+       err = obd_init_checks();
+       if (err == -EOVERFLOW)
+               return err;
 
-        class_init_uuidlist();
-        err = class_handle_init();
-        if (err)
-                return err;
+       class_init_uuidlist();
+       err = class_handle_init();
+       if (err)
+               return err;
 
        INIT_LIST_HEAD(&obd_types);
 
@@ -550,30 +537,26 @@ static int __init init_obdclass(void)
                return err;
        }
 
-        /* This struct is already zeroed for us (static global) */
-        for (i = 0; i < class_devno_max(); i++)
-                obd_devs[i] = NULL;
+       /* This struct is already zeroed for us (static global) */
+       for (i = 0; i < class_devno_max(); i++)
+               obd_devs[i] = NULL;
 
-        /* Default the dirty page cache cap to 1/2 of system memory.
-         * For clients with less memory, a larger fraction is needed
-         * for other purposes (mostly for BGL). */
-       if (totalram_pages <= 512 << (20 - PAGE_CACHE_SHIFT))
+       /* Default the dirty page cache cap to 1/2 of system memory.
+        * For clients with less memory, a larger fraction is needed
+        * for other purposes (mostly for BGL). */
+       if (totalram_pages <= 512 << (20 - PAGE_SHIFT))
                obd_max_dirty_pages = totalram_pages / 4;
        else
                obd_max_dirty_pages = totalram_pages / 2;
 
-        err = obd_init_caches();
-        if (err)
-                return err;
-        err = class_procfs_init();
-        if (err)
-                return err;
-
-       err = lu_global_init();
+       err = obd_init_caches();
+       if (err)
+               return err;
+       err = class_procfs_init();
        if (err)
                return err;
 
-       err = lu_capainfo_init();
+       err = lu_global_init();
        if (err)
                return err;
 
@@ -595,63 +578,45 @@ static int __init init_obdclass(void)
        if (err)
                return err;
 
-        err = lustre_register_fs();
+       err = lustre_register_fs();
 
-        return err;
+       return err;
 }
 
 void obd_update_maxusage(void)
 {
-       __u64 max1, max2;
+       __u64 max;
 
-       max1 = obd_pages_sum();
-       max2 = obd_memory_sum();
+       max = obd_memory_sum();
 
        spin_lock(&obd_updatemax_lock);
-       if (max1 > obd_max_pages)
-               obd_max_pages = max1;
-       if (max2 > obd_max_alloc)
-               obd_max_alloc = max2;
+       if (max > obd_max_alloc)
+               obd_max_alloc = max;
        spin_unlock(&obd_updatemax_lock);
 }
 EXPORT_SYMBOL(obd_update_maxusage);
 
-#ifdef LPROCFS
+#ifdef CONFIG_PROC_FS
 __u64 obd_memory_max(void)
 {
        __u64 ret;
 
+       obd_update_maxusage();
        spin_lock(&obd_updatemax_lock);
        ret = obd_max_alloc;
        spin_unlock(&obd_updatemax_lock);
 
        return ret;
 }
-EXPORT_SYMBOL(obd_memory_max);
+#endif /* CONFIG_PROC_FS */
 
-__u64 obd_pages_max(void)
+static void __exit obdclass_exit(void)
 {
-       __u64 ret;
-
-       spin_lock(&obd_updatemax_lock);
-       ret = obd_max_pages;
-       spin_unlock(&obd_updatemax_lock);
-
-       return ret;
-}
-EXPORT_SYMBOL(obd_pages_max);
-#endif /* LPROCFS */
-
-/* liblustre doesn't call cleanup_obdclass, apparently.  we carry on in this
- * ifdef to the end of the file to cover module and versioning goo.*/
-static void cleanup_obdclass(void)
-{
-        int lustre_unregister_fs(void);
-        __u64 memory_leaked, pages_leaked;
-        __u64 memory_max, pages_max;
-        ENTRY;
+       __u64 memory_leaked;
+       __u64 memory_max;
+       ENTRY;
 
-        lustre_unregister_fs();
+       lustre_unregister_fs();
 
        misc_deregister(&obd_psdev);
        llog_info_fini();
@@ -660,7 +625,6 @@ static void cleanup_obdclass(void)
        dt_global_fini();
 #endif /* HAVE_SERVER_SUPPORT */
        cl_global_fini();
-       lu_capainfo_fini();
        lu_global_fini();
 
         obd_cleanup_caches();
@@ -671,26 +635,24 @@ static void cleanup_obdclass(void)
         class_handle_cleanup();
         class_exit_uuidlist();
         obd_zombie_impexp_stop();
+       LASSERT(list_empty(&obd_stale_exports));
 
         memory_leaked = obd_memory_sum();
-        pages_leaked = obd_pages_sum();
 
         memory_max = obd_memory_max();
-        pages_max = obd_pages_max();
 
         lprocfs_free_stats(&obd_memory);
         CDEBUG((memory_leaked) ? D_ERROR : D_INFO,
-               "obd_memory max: "LPU64", leaked: "LPU64"\n",
+              "obd_memory max: %llu, leaked: %llu\n",
                memory_max, memory_leaked);
-        CDEBUG((pages_leaked) ? D_ERROR : D_INFO,
-               "obd_memory_pages max: "LPU64", leaked: "LPU64"\n",
-               pages_max, pages_leaked);
 
         EXIT;
 }
 
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
+MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Class Driver");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
 MODULE_LICENSE("GPL");
 
-cfs_module(obdclass, LUSTRE_VERSION_STRING, init_obdclass, cleanup_obdclass);
+module_init(obdclass_init);
+module_exit(obdclass_exit);