Whamcloud - gitweb
fix changelog
[fs/lustre-release.git] / lustre / lvfs / lvfs_lib.c
index 5725e25..009c848 100644 (file)
@@ -1,26 +1,43 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lustre/lvfs/lvfs_lib.c
- *  Lustre filesystem abstraction routines
+ * GPL HEADER START
  *
- *  Copyright (C) 2007 Cluster File Systems, Inc.
- *   Author: Andreas Dilger <adilger@clusterfs.com>
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lvfs/lvfs_lib.c
+ *
+ * Lustre filesystem abstraction routines
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
  */
 #ifdef __KERNEL__
 #include <linux/module.h>
 #include <lustre_lib.h>
 #include <lprocfs_status.h>
 
+__u64 obd_max_pages = 0;
+__u64 obd_max_alloc = 0;
+
+#ifdef __KERNEL__
+struct lprocfs_stats *obd_memory = NULL;
+spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED;
+/* refine later and change to seqlock or simlar from libcfs */
+#else
+__u64 obd_alloc;
+__u64 obd_pages;
+#endif
+
 unsigned int obd_fail_val = 0;
-unsigned long obd_fail_loc = 0;
+unsigned int obd_fail_loc = 0;
 unsigned int obd_alloc_fail_rate = 0;
 
 int obd_alloc_fail(const void *ptr, const char *name, const char *type,
@@ -49,82 +78,102 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
                        obd_memory_sum(),
                        obd_pages_sum() << CFS_PAGE_SHIFT,
                        obd_pages_sum(),
-                       atomic_read(&libcfs_kmemory));                
-                return 1;
+                       atomic_read(&libcfs_kmemory));
+               return 1;
         }
         return 0;
 }
 EXPORT_SYMBOL(obd_alloc_fail);
 
-int __obd_fail_check_set(__u32 id, __u32 value, int set)
+#ifdef __KERNEL__
+void obd_update_maxusage()
 {
-        static atomic_t obd_fail_count = ATOMIC_INIT(0);
+        __u64 max1, max2;
 
-        LASSERT(!(id & OBD_FAIL_ONCE));
+        max1 = obd_pages_sum();
+        max2 = obd_memory_sum();
 
-        if ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE)) ==
-            (OBD_FAILED | OBD_FAIL_ONCE)) {
-                atomic_set(&obd_fail_count, 0); /* paranoia */
-                return 0;
-        }
+        spin_lock(&obd_updatemax_lock);
+        if (max1 > obd_max_pages)
+                obd_max_pages = max1;
+        if (max2 > obd_max_alloc)
+                obd_max_alloc = max2;
+        spin_unlock(&obd_updatemax_lock);
+}
 
-        /* Fail 1/obd_fail_val times */
-        if (obd_fail_loc & OBD_FAIL_RAND) {
-                if (obd_fail_val < 2 || ll_rand() % obd_fail_val > 0)
-                        return 0;
-        }
+__u64 obd_memory_max(void)
+{
+        __u64 ret;
 
-        /* Skip the first obd_fail_val, then fail */
-        if (obd_fail_loc & OBD_FAIL_SKIP) {
-                if (atomic_inc_return(&obd_fail_count) <= obd_fail_val)
-                        return 0;
-        }
+        spin_lock(&obd_updatemax_lock);
+        ret = obd_max_alloc;
+        spin_unlock(&obd_updatemax_lock);
 
-        /* Fail obd_fail_val times, overridden by FAIL_ONCE */
-        if (obd_fail_loc & OBD_FAIL_SOME &&
-            (!(obd_fail_loc & OBD_FAIL_ONCE) || obd_fail_val <= 1)) { 
-                int count = atomic_inc_return(&obd_fail_count);
+        return ret;
+}
 
-                if (count >= obd_fail_val) {
-                        set_bit(OBD_FAIL_ONCE_BIT, &obd_fail_loc);
-                        atomic_set(&obd_fail_count, 0);
-                        /* we are lost race to increase obd_fail_count */
-                        if (count > obd_fail_val)
-                                return 0;
-                }
-        }
+__u64 obd_pages_max(void)
+{
+        __u64 ret;
 
-        if ((set == OBD_FAIL_LOC_ORSET || set == OBD_FAIL_LOC_RESET) &&
-            (value & OBD_FAIL_ONCE))
-                set_bit(OBD_FAIL_ONCE_BIT, &obd_fail_loc);
+        spin_lock(&obd_updatemax_lock);
+        ret = obd_max_pages;
+        spin_unlock(&obd_updatemax_lock);
 
-        /* Lost race to set OBD_FAILED_BIT. */
-        if (test_and_set_bit(OBD_FAILED_BIT, &obd_fail_loc)) {
-                /* If OBD_FAIL_ONCE is valid, only one process can fail,
-                 * otherwise multi-process can fail at the same time. */
-                if (obd_fail_loc & OBD_FAIL_ONCE)
-                        return 0;
-        }
+        return ret;
+}
 
-        switch (set) {
-                case OBD_FAIL_LOC_NOSET:
-                        break;
-                case OBD_FAIL_LOC_ORSET:
-                        obd_fail_loc |= value & ~(OBD_FAILED | OBD_FAIL_ONCE);
-                        break;
-                case OBD_FAIL_LOC_RESET:
-                        obd_fail_loc = value;
-                        break;
-                default:
-                        LASSERTF(0, "called with bad set %u\n", set);
-                        break;
-        }
+EXPORT_SYMBOL(obd_update_maxusage);
+EXPORT_SYMBOL(obd_pages_max);
+EXPORT_SYMBOL(obd_memory_max);
+EXPORT_SYMBOL(obd_memory);
 
-        return 1;
-}
-EXPORT_SYMBOL(__obd_fail_check_set);
+#endif
 
 #ifdef LPROCFS
+__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+                          enum lprocfs_fields_flags field)
+{
+        __s64 ret = 0;
+        int centry;
+
+        if (!lc)
+                RETURN(0);
+        do {
+                centry = atomic_read(&lc->lc_cntl.la_entry);
+
+                switch (field) {
+                        case LPROCFS_FIELDS_FLAGS_CONFIG:
+                                ret = lc->lc_config;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_SUM:
+                                ret = lc->lc_sum + lc->lc_sum_irq;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_MIN:
+                                ret = lc->lc_min;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_MAX:
+                                ret = lc->lc_max;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_AVG:
+                                ret = (lc->lc_max - lc->lc_min)/2;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
+                                ret = lc->lc_sumsquare;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_COUNT:
+                                ret = lc->lc_count;
+                                break;
+                        default:
+                                break;
+                };
+        } while (centry != atomic_read(&lc->lc_cntl.la_entry) &&
+                 centry != atomic_read(&lc->lc_cntl.la_exit));
+
+        RETURN(ret);
+}
+EXPORT_SYMBOL(lprocfs_read_helper);
+
 void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
                                        long amount)
 {
@@ -143,9 +192,12 @@ void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
         percpu_cntr->lc_count++;
 
         if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+                /* see comment in lprocfs_counter_sub */
+                LASSERT(!cfs_in_interrupt());
+
                 percpu_cntr->lc_sum += amount;
                 if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV)
-                        percpu_cntr->lc_sumsquare += (__s64)amount * amount;
+                        percpu_cntr->lc_sumsquare += (__u64)amount * amount;
                 if (amount < percpu_cntr->lc_min)
                         percpu_cntr->lc_min = amount;
                 if (amount > percpu_cntr->lc_max)
@@ -171,8 +223,20 @@ void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
 
         percpu_cntr = &(stats->ls_percpu[smp_id]->lp_cntr[idx]);
         atomic_inc(&percpu_cntr->lc_cntl.la_entry);
-        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX)
-                percpu_cntr->lc_sum -= amount;
+        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+                /*
+                 * currently lprocfs_count_add() can only be called in thread
+                 * context; sometimes we use RCU callbacks to free memory
+                 * which calls lprocfs_counter_sub(), and RCU callbacks may
+                 * execute in softirq context - right now that's the only case
+                 * we're in softirq context here, use separate counter for that.
+                 * bz20650.
+                 */
+                if (cfs_in_interrupt())
+                        percpu_cntr->lc_sum_irq -= amount;
+                else
+                        percpu_cntr->lc_sum -= amount;
+        }
         atomic_inc(&percpu_cntr->lc_cntl.la_exit);
         lprocfs_stats_unlock(stats);
 }
@@ -182,3 +246,49 @@ EXPORT_SYMBOL(lprocfs_counter_sub);
 EXPORT_SYMBOL(obd_fail_loc);
 EXPORT_SYMBOL(obd_alloc_fail_rate);
 EXPORT_SYMBOL(obd_fail_val);
+
+int obd_fail_check(__u32 id)
+{
+        static int count = 0;
+        if (likely((obd_fail_loc & OBD_FAIL_MASK_LOC) !=
+                   (id & OBD_FAIL_MASK_LOC)))
+                return 0;
+
+        if ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE)) ==
+            (OBD_FAILED | OBD_FAIL_ONCE)) {
+                count = 0; /* paranoia */
+                return 0;
+        }
+
+        if (obd_fail_loc & OBD_FAIL_RAND) {
+                if (obd_fail_val < 2)
+                        return 0;
+                if (ll_rand() % obd_fail_val > 0)
+                        return 0;
+        }
+
+        if (obd_fail_loc & OBD_FAIL_SKIP) {
+                count++;
+                if (count < obd_fail_val)
+                        return 0;
+                count = 0;
+        }
+
+        /* Overridden by FAIL_ONCE */
+        if (obd_fail_loc & OBD_FAIL_SOME) {
+                count++;
+                if (count >= obd_fail_val) {
+                        count = 0;
+                        /* Don't fail anymore */
+                        obd_fail_loc |= OBD_FAIL_ONCE;
+                }
+        }
+
+        obd_fail_loc |= OBD_FAILED;
+        /* Handle old checks that OR in this */
+        if (id & OBD_FAIL_ONCE)
+                obd_fail_loc |= OBD_FAIL_ONCE;
+
+        return 1;
+}
+EXPORT_SYMBOL(obd_fail_check);