Whamcloud - gitweb
b=22334 swap the declarations for OBD_FAIL_MDS_READLINK_EPROTO and OBD_FAIL_MDS_OPEN_...
[fs/lustre-release.git] / lustre / include / obd_support.h
index 1b31dd4..ae14668 100644 (file)
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * GPL HEADER START
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
 #ifndef _OBD_SUPPORT
 #define _OBD_SUPPORT
 
-#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include <lvfs.h>
+#include <lprocfs_status.h>
+
+#if defined(__linux__)
+#include <linux/obd_support.h>
+#elif defined(__APPLE__)
+#include <darwin/obd_support.h>
+#elif defined(__WINNT__)
+#include <winnt/obd_support.h>
+#else
+#error Unsupported operating system.
+#endif
 
 /* global variables */
-extern atomic_t obd_memory;
-extern int obd_memmax;
-extern unsigned int obd_fail_loc;
+extern struct lprocfs_stats *obd_memory;
+enum {
+        OBD_MEMORY_STAT = 0,
+        OBD_MEMORY_PAGES_STAT = 1,
+        OBD_STATS_NUM,
+};
+
+enum {
+        OBD_FAIL_LOC_NOSET      = 0,
+        OBD_FAIL_LOC_ORSET      = 1,
+        OBD_FAIL_LOC_RESET      = 2
+};
+
+extern unsigned long obd_fail_loc;
+extern unsigned int obd_fail_val;
 extern unsigned int obd_debug_peer_on_timeout;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_dump_on_eviction;
+/* obd_timeout should only be used for recovery, not for
+   networking / disk / timings affected by load (use Adaptive Timeouts) */
 extern unsigned int obd_timeout;          /* seconds */
-#define PING_INTERVAL max(obd_timeout / 4, 1U)
-#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
-#define LDLM_TIMEOUT_DEFAULT 20
-extern unsigned int ldlm_timeout;
-extern unsigned int obd_health_check_timeout;
+extern unsigned int ldlm_timeout;         /* seconds */
+extern unsigned int at_min;
+extern unsigned int at_max;
+extern unsigned int at_history;
+extern int at_early_margin;
+extern int at_extra;
 extern unsigned int obd_sync_filter;
 extern unsigned int obd_max_dirty_pages;
-extern atomic_t obd_dirty_pages;
+extern cfs_atomic_t obd_dirty_pages;
+extern cfs_atomic_t obd_dirty_transit_pages;
 extern cfs_waitq_t obd_race_waitq;
 extern int obd_race_state;
-
+extern unsigned int obd_alloc_fail_rate;
+
+int __obd_fail_check_set(__u32 id, __u32 value, int set);
+int __obd_fail_timeout_set(__u32 id, __u32 value, int ms, int set);
+
+/* lvfs.c */
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+                   size_t size, const char *file, int line);
+
+/* Some hash init argument constants */
+#define HASH_POOLS_CUR_BITS 3
+#define HASH_POOLS_MAX_BITS 7
+#define HASH_UUID_CUR_BITS 7
+#define HASH_UUID_MAX_BITS 12
+#define HASH_NID_CUR_BITS 7
+#define HASH_NID_MAX_BITS 12
+#define HASH_NID_STATS_CUR_BITS 7
+#define HASH_NID_STATS_MAX_BITS 12
+#define HASH_LQS_CUR_BITS 7
+#define HASH_LQS_MAX_BITS 12
+#define HASH_CONN_CUR_BITS 5
+#define HASH_CONN_MAX_BITS 15
+#define HASH_EXP_LOCK_CUR_BITS  7
+#define HASH_EXP_LOCK_MAX_BITS  16
+
+/* Timeout definitions */
+#define OBD_TIMEOUT_DEFAULT             100
+#define LDLM_TIMEOUT_DEFAULT            20
+#define MDS_LDLM_TIMEOUT_DEFAULT        6
+/* Time to wait for all clients to reconnect during recovery */
+/* Should be very conservative; must catch the first reconnect after reboot */
+#define OBD_RECOVERY_FACTOR (3) /* times obd_timeout */
+/* Change recovery-small 26b time if you change this */
+#define PING_INTERVAL max(obd_timeout / 4, 1U)
+/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
+ * failover targets the client only pings one server at a time, and pings
+ * can be lost on a loaded network. Since eviction has serious consequences,
+ * and there's no urgent need to evict a client just because it's idle, we
+ * should be very conservative here. */
+#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
+#define DISK_TIMEOUT 50          /* Beyond this we warn about disk speed */
+#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
+ /* Max connect interval for nonresponsive servers; ~50s to avoid building up
+    connect requests in the LND queues, but within obd_timeout so we don't
+    miss the recovery window */
+#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout))
+#define CONNECTION_SWITCH_INC 5  /* Connection timeout backoff */
+#ifndef CRAY_XT3
+/* In general this should be low to have quick detection of a system
+   running on a backup server. (If it's too low, import_select_connection
+   will increase the timeout anyhow.)  */
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
+#else
+/* ...but for very large systems (e.g. CRAY) we need to keep the initial
+   connect t.o. high (bz 10803), because they will nearly ALWAYS be doing the
+   connects for the first time (clients "reboot" after every process, so no
+   chance to generate adaptive timeout data. */
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/2)
+#endif
+/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
+#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
+                             INITIAL_CONNECT_TIMEOUT)
+#define LONG_UNLINK 300          /* Unlink should happen before now */
+
+/**
+ * Time interval of shrink, if the client is "idle" more than this interval,
+ * then the ll_grant thread will return the requested grant space to filter
+ */
+#define GRANT_SHRINK_INTERVAL            1200/*20 minutes*/
 
 #define OBD_FAIL_MDS                     0x100
 #define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
@@ -100,6 +210,29 @@ extern int obd_race_state;
 #define OBD_FAIL_MDS_SETXATTR_WRITE      0x134
 #define OBD_FAIL_MDS_FS_SETUP            0x135
 #define OBD_FAIL_MDS_RESEND              0x136
+#define OBD_FAIL_MDS_LLOG_CREATE_FAILED  0x137
+#define OBD_FAIL_MDS_LOV_SYNC_RACE       0x138
+#define OBD_FAIL_MDS_OSC_PRECREATE       0x139
+#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT   0x13a
+#define OBD_FAIL_MDS_CLOSE_NET_REP       0x13b
+#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ     0x13c
+#define OBD_FAIL_MDS_DROP_QUOTA_REQ      0x13d
+#define OBD_FAIL_MDS_REMOVE_COMMON_EA    0x13e
+#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING   0x13f
+#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD    0x140
+#define OBD_FAIL_MDS_LOV_PREP_CREATE     0x141
+#define OBD_FAIL_MDS_REINT_DELAY         0x142
+#define OBD_FAIL_MDS_READLINK_EPROTO     0x143
+#define OBD_FAIL_MDS_OPEN_WAIT_CREATE    0x144
+
+/* CMD */
+#define OBD_FAIL_MDS_IS_SUBDIR_NET       0x180
+#define OBD_FAIL_MDS_IS_SUBDIR_PACK      0x181
+#define OBD_FAIL_MDS_SET_INFO_NET        0x182
+#define OBD_FAIL_MDS_WRITEPAGE_NET       0x183
+#define OBD_FAIL_MDS_WRITEPAGE_PACK      0x184
+#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185
+#define OBD_FAIL_MDS_GET_INFO_NET        0x186
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -119,7 +252,7 @@ extern int obd_race_state;
 #define OBD_FAIL_OST_BRW_READ_BULK       0x20f
 #define OBD_FAIL_OST_SYNC_NET            0x210
 #define OBD_FAIL_OST_ALL_REPLY_NET       0x211
-#define OBD_FAIL_OST_ALL_REQUESTS_NET    0x212
+#define OBD_FAIL_OST_ALL_REQUEST_NET     0x212
 #define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
 #define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
 #define OBD_FAIL_OST_ENOSPC              0x215
@@ -132,6 +265,15 @@ extern int obd_race_state;
 #define OBD_FAIL_OST_BRW_SIZE            0x21c
 #define OBD_FAIL_OST_DROP_REQ            0x21d
 #define OBD_FAIL_OST_SETATTR_CREDITS     0x21e
+#define OBD_FAIL_OST_HOLD_WRITE_RPC      0x21f
+#define OBD_FAIL_OST_BRW_WRITE_BULK2     0x220
+#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
+#define OBD_FAIL_OST_PAUSE_CREATE        0x223
+#define OBD_FAIL_OST_BRW_PAUSE_PACK      0x224
+#define OBD_FAIL_OST_CONNECT_NET2        0x225
+#define OBD_FAIL_OST_NOMEM               0x226
+#define OBD_FAIL_OST_BRW_PAUSE_BULK2     0x227
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -149,6 +291,18 @@ extern int obd_race_state;
 #define OBD_FAIL_LDLM_RECOV_CLIENTS      0x30d
 #define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
 #define OBD_FAIL_LDLM_GLIMPSE            0x30f
+#define OBD_FAIL_LDLM_CANCEL_RACE        0x310
+#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE  0x311
+#define OBD_FAIL_LDLM_PAUSE_CANCEL       0x312
+#define OBD_FAIL_LDLM_CLOSE_THREAD       0x313
+#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE  0x314
+#define OBD_FAIL_LDLM_CP_CB_WAIT         0x315
+#define OBD_FAIL_LDLM_OST_FAIL_RACE      0x316
+#define OBD_FAIL_LDLM_INTR_CP_AST        0x317
+#define OBD_FAIL_LDLM_CP_BL_RACE         0x318
+
+/* LOCKLESS IO */
+#define OBD_FAIL_LDLM_SET_CONTENTION     0x385
 
 #define OBD_FAIL_OSC                     0x400
 #define OBD_FAIL_OSC_BRW_READ_BULK       0x401
@@ -160,6 +314,12 @@ extern int obd_race_state;
 #define OBD_FAIL_OSC_SHUTDOWN            0x407
 #define OBD_FAIL_OSC_CHECKSUM_RECEIVE    0x408
 #define OBD_FAIL_OSC_CHECKSUM_SEND       0x409
+#define OBD_FAIL_OSC_BRW_PREP_REQ2       0x40a
+#define OBD_FAIL_OSC_CONNECT_CKSUM       0x40b
+#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
+#define OBD_FAIL_OSC_DIO_PAUSE           0x40d
+#define OBD_FAIL_OSC_OBJECT_CONTENTION   0x40e
+#define OBD_FAIL_OSC_CP_CANCEL_RACE      0x40f
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
@@ -168,12 +328,25 @@ extern int obd_race_state;
 #define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
 #define OBD_FAIL_PTLRPC_DROP_RPC         0x505
 #define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
+#define OBD_FAIL_PTLRPC_DELAY_RECOV      0x507
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB   0x508
+#define OBD_FAIL_PTLRPC_PAUSE_REQ        0x50a
+#define OBD_FAIL_PTLRPC_PAUSE_REP        0x50c
+#define OBD_FAIL_PTLRPC_IMP_DEACTIVE     0x50d
+#define OBD_FAIL_PTLRPC_DUMP_LOG         0x50e
+#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
+#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510
+#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT    0x511
+#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT  0x512
+#define OBD_FAIL_PTLRPC_DROP_REQ_OPC     0x513
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
 #define OBD_FAIL_OBD_LOGD_NET            0x602
 #define OBD_FAIL_OBD_QC_CALLBACK_NET     0x603
 #define OBD_FAIL_OBD_DQACQ               0x604
+#define OBD_FAIL_OBD_LLOG_SETUP          0x605
+#define OBD_FAIL_OBD_LOG_CANCEL_REP      0x606
 
 #define OBD_FAIL_TGT_REPLY_NET           0x700
 #define OBD_FAIL_TGT_CONN_RACE           0x701
@@ -181,82 +354,163 @@ extern int obd_race_state;
 #define OBD_FAIL_TGT_DELAY_CONNECT       0x703
 #define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
 #define OBD_FAIL_TGT_DELAY_PRECREATE     0x705
+#define OBD_FAIL_TGT_TOOMANY_THREADS     0x706
+#define OBD_FAIL_TGT_REPLAY_DROP         0x707
+#define OBD_FAIL_TGT_FAKE_EXP            0x708
+#define OBD_FAIL_TGT_REPLAY_DELAY        0x709
+#define OBD_FAIL_TGT_LAST_REPLAY         0x710
+#define OBD_FAIL_TGT_CLIENT_ADD          0x711
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
+#define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
+#define OBD_FAIL_MDC_OLD_EXT_FLAGS       0x802
+#define OBD_FAIL_MDC_GETATTR_ENQUEUE     0x803
 
 #define OBD_FAIL_MGS                     0x900
 #define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
 #define OBD_FAIL_MGS_ALL_REPLY_NET       0x902
-#define OBD_FAIL_MGC_PROCESS_LOG         0x903
-#define OBD_FAIL_MGS_SLOW_REQUEST_NET    0x904
-#define OBD_FAIL_MGS_SLOW_TARGET_REG     0x905
+#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG   0x903
+#define OBD_FAIL_MGS_PAUSE_REQ           0x904
+#define OBD_FAIL_MGS_PAUSE_TARGET_REG    0x905
 
-#define OBD_FAIL_QUOTA_QD_COUNT_32BIT    0xA00
+#define OBD_FAIL_QUOTA_RET_QDATA         0xA02
+#define OBD_FAIL_QUOTA_DELAY_REL         0xA03
+#define OBD_FAIL_QUOTA_DELAY_SD          0xA04
 
 #define OBD_FAIL_LPROC_REMOVE            0xB00
 
-/* preparation for a more advanced failure testbed (not functional yet) */
-#define OBD_FAIL_MASK_SYS    0x0000FF00
-#define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
-#define OBD_FAIL_ONCE        0x80000000
-#define OBD_FAILED           0x40000000
-
-#define OBD_FAIL_CHECK(id)   (((obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
-                              ((id) & OBD_FAIL_MASK_LOC)) &&                   \
-                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \
-                                (OBD_FAILED | OBD_FAIL_ONCE)))
-
-#define OBD_FAIL_CHECK_ONCE(id)                                              \
-({      int _ret_ = 0;                                                       \
-        if (OBD_FAIL_CHECK(id)) {                                            \
-                CERROR("*** obd_fail_loc=%x ***\n", id);                     \
-                obd_fail_loc |= OBD_FAILED;                                  \
-                if ((id) & OBD_FAIL_ONCE)                                    \
-                        obd_fail_loc |= OBD_FAIL_ONCE;                       \
-                _ret_ = 1;                                                   \
-        }                                                                    \
-        _ret_;                                                               \
-})
+#define OBD_FAIL_GENERAL_ALLOC           0xC00
 
-#define OBD_FAIL_RETURN(id, ret)                                             \
-do {                                                                         \
-        if (OBD_FAIL_CHECK_ONCE(id)) {                                       \
-                RETURN(ret);                                                 \
-        }                                                                    \
-} while(0)
+#define OBD_FAIL_SEQ                     0x1000
+#define OBD_FAIL_SEQ_QUERY_NET           0x1001
 
-#define OBD_FAIL_TIMEOUT(id, secs)                                           \
-do {                                                                         \
-        if (OBD_FAIL_CHECK_ONCE(id)) {                                       \
-                CERROR("obd_fail_timeout id %x sleeping for %d secs\n",      \
-                       (id), (secs));                                        \
-                set_current_state(TASK_UNINTERRUPTIBLE);                     \
-                cfs_schedule_timeout(CFS_TASK_UNINT,                         \
-                                    cfs_time_seconds(secs));                 \
-                set_current_state(TASK_RUNNING);                             \
-                CERROR("obd_fail_timeout id %x awake\n", (id));              \
-       }                                                                     \
-} while(0)
+#define OBD_FAIL_FLD                     0x1100
+#define OBD_FAIL_FLD_QUERY_NET           0x1101
+
+#define OBD_FAIL_SEC_CTX                 0x1200
+#define OBD_FAIL_SEC_CTX_INIT_NET        0x1201
+#define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1202
+#define OBD_FAIL_SEC_CTX_FINI_NET        0x1203
+#define OBD_FAIL_SEC_CTX_HDL_PAUSE       0x1204
+
+#define OBD_FAIL_LLOG                               0x1300
+#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET            0x1301
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET      0x1302
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET     0x1303
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET  0x1305
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET  0x1306
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET   0x1307
+#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET       0x1308
+#define OBD_FAIL_LLOG_CATINFO_NET                   0x1309
+
+
+/* Failure injection control */
+#define OBD_FAIL_MASK_SYS    0x0000FF00
+#define OBD_FAIL_MASK_LOC   (0x000000FF | OBD_FAIL_MASK_SYS)
+
+#define OBD_FAILED_BIT       30
+/* OBD_FAILED is 0x40000000 */
+#define OBD_FAILED          (1 << OBD_FAILED_BIT)
+
+#define OBD_FAIL_ONCE_BIT    31
+/* OBD_FAIL_ONCE is 0x80000000 */
+#define OBD_FAIL_ONCE       (1 << OBD_FAIL_ONCE_BIT)
+
+/* The following flags aren't made to be combined */
+#define OBD_FAIL_SKIP        0x20000000 /* skip N times then fail */
+#define OBD_FAIL_SOME        0x10000000 /* only fail N times */
+#define OBD_FAIL_RAND        0x08000000 /* fail 1/N of the times */
+#define OBD_FAIL_USR1        0x04000000 /* user flag */
+
+#define OBD_FAIL_PRECHECK(id) (obd_fail_loc &&                                \
+                              (obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
+                              ((id) & OBD_FAIL_MASK_LOC))
+
+static inline int obd_fail_check_set(__u32 id, __u32 value, int set)
+{
+        int ret = 0;
+        if (unlikely(OBD_FAIL_PRECHECK(id) &&
+            (ret = __obd_fail_check_set(id, value, set)))) {
+                CERROR("*** obd_fail_loc=%x ***\n", id);
+        }
+        return ret;
+}
+
+/* If id hit obd_fail_loc, return 1, otherwise return 0 */
+#define OBD_FAIL_CHECK(id) \
+        obd_fail_check_set(id, 0, OBD_FAIL_LOC_NOSET)
+
+/* If id hit obd_fail_loc, obd_fail_loc |= value and return 1,
+ * otherwise return 0 */
+#define OBD_FAIL_CHECK_ORSET(id, value) \
+        obd_fail_check_set(id, value, OBD_FAIL_LOC_ORSET)
+
+/* If id hit obd_fail_loc, obd_fail_loc = value and return 1,
+ * otherwise return 0 */
+#define OBD_FAIL_CHECK_RESET(id, value) \
+        obd_fail_check_set(id, value, OBD_FAIL_LOC_RESET)
+
+
+static inline int obd_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+{
+        if (unlikely(OBD_FAIL_PRECHECK(id)))
+                return __obd_fail_timeout_set(id, value, ms, set);
+        else
+                return 0;
+}
+
+/* If id hit obd_fail_loc, sleep for seconds or milliseconds */
+#define OBD_FAIL_TIMEOUT(id, secs) \
+        obd_fail_timeout_set(id, 0, secs * 1000, OBD_FAIL_LOC_NOSET)
+
+#define OBD_FAIL_TIMEOUT_MS(id, ms) \
+        obd_fail_timeout_set(id, 0, ms, OBD_FAIL_LOC_NOSET)
+
+/* If id hit obd_fail_loc, obd_fail_loc |= value and
+ * sleep seconds or milliseconds */
+#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) \
+        obd_fail_timeout_set(id, value, secs * 1000, OBD_FAIL_LOC_ORSET)
+
+#define OBD_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
+        obd_fail_timeout_set(id, value, ms, OBD_FAIL_LOC_ORSET)
 
 #ifdef __KERNEL__
+static inline void obd_fail_write(int id, struct super_block *sb)
+{
+        /* We set FAIL_ONCE because we never "un-fail" a device */
+        if (OBD_FAIL_CHECK_ORSET(id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
+#ifdef LIBCFS_DEBUG
+                BDEVNAME_DECLARE_STORAGE(tmp);
+                CERROR("obd_fail_loc=%x, fail write operation on %s\n",
+                       id, ll_bdevname(sb, tmp));
+#endif
+                /* TODO-CMD: fix getting jdev */
+                __lvfs_set_rdonly(lvfs_sbdev(sb), (lvfs_sbdev_type)0);
+        }
+}
+#define OBD_FAIL_WRITE(id, sb) obd_fail_write(id, sb)
+
 /* The idea here is to synchronise two threads to force a race. The
  * first thread that calls this with a matching fail_loc is put to
  * sleep. The next thread that calls with the same fail_loc wakes up
  * the first and continues. */
-#define OBD_RACE(id)                                                         \
-do {                                                                         \
-        if  (OBD_FAIL_CHECK_ONCE(id)) {                                      \
-                obd_race_state = 0;                                          \
-                CERROR("obd_race id %x sleeping\n", (id));                   \
-                OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);           \
-                CERROR("obd_fail_race id %x awake\n", (id));                 \
-        } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) ==                     \
-                    ((id) & OBD_FAIL_MASK_LOC)) {                            \
-                CERROR("obd_fail_race id %x waking\n", (id));                \
-                obd_race_state = 1;                                          \
-                wake_up(&obd_race_waitq);                                    \
-        }                                                                    \
-} while(0)
+static inline void obd_race(__u32 id)
+{
+        if (OBD_FAIL_PRECHECK(id)) {
+                if (unlikely(__obd_fail_check_set(id, 0, OBD_FAIL_LOC_NOSET))) {
+                        obd_race_state = 0;
+                        CERROR("obd_race id %x sleeping\n", id);
+                        OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);
+                        CERROR("obd_fail_race id %x awake\n", id);
+                } else {
+                        CERROR("obd_fail_race id %x waking\n", id);
+                        obd_race_state = 1;
+                        cfs_waitq_signal(&obd_race_waitq);
+                }
+        }
+}
+#define OBD_RACE(id) obd_race(id)
 #else
 /* sigh.  an expedient fix until OBD_RACE is fixed up */
 #define OBD_RACE(foo) do {} while(0)
@@ -264,13 +518,107 @@ do {                                                                         \
 
 #define fixme() CDEBUG(D_OTHER, "FIXME\n");
 
-extern atomic_t libcfs_kmemory;
+extern cfs_atomic_t libcfs_kmemory;
+
+#ifdef LPROCFS
+#define obd_memory_add(size)                                                  \
+        lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sub(size)                                                  \
+        lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size))
+#define obd_memory_sum()                                                      \
+        lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT,                  \
+                                LPROCFS_FIELDS_FLAGS_SUM)
+#define obd_pages_add(order)                                                  \
+        lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT,                \
+                            (long)(1 << (order)))
+#define obd_pages_sub(order)                                                  \
+        lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT,                \
+                            (long)(1 << (order)))
+#define obd_pages_sum()                                                       \
+        lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT,            \
+                                LPROCFS_FIELDS_FLAGS_SUM)
+
+extern void obd_update_maxusage(void);
+extern __u64 obd_memory_max(void);
+extern __u64 obd_pages_max(void);
+
+#else
+
+extern __u64 obd_alloc;
+extern __u64 obd_pages;
+
+extern __u64 obd_max_alloc;
+extern __u64 obd_max_pages;
+
+static inline void obd_memory_add(long size)
+{
+        obd_alloc += size;
+        if (obd_alloc > obd_max_alloc)
+                obd_max_alloc = obd_alloc;
+}
+
+static inline void obd_memory_sub(long size)
+{
+        obd_alloc -= size;
+}
+
+static inline void obd_pages_add(int order)
+{
+        obd_pages += 1<< order;
+        if (obd_pages > obd_max_pages)
+                obd_max_pages = obd_pages;
+}
+
+static inline void obd_pages_sub(int order)
+{
+        obd_pages -= 1<< order;
+}
+
+#define obd_memory_sum() (obd_alloc)
+#define obd_pages_sum()  (obd_pages)
+
+#define obd_memory_max() (obd_max_alloc)
+#define obd_pages_max() (obd_max_pages)
+
+#endif
+
+#define OBD_DEBUG_MEMUSAGE (1)
+
+#if OBD_DEBUG_MEMUSAGE
+#define OBD_ALLOC_POST(ptr, size, name)                                 \
+                obd_memory_add(size);                                   \
+                CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n",       \
+                       (int)(size), ptr)
+
+#define OBD_FREE_PRE(ptr, size, name)                                   \
+        LASSERT(ptr);                                                   \
+        obd_memory_sub(size);                                           \
+        CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n",               \
+               (int)(size), ptr);                                       \
+        POISON(ptr, 0x5a, size)
+
+#else /* !OBD_DEBUG_MEMUSAGE */
+
+#define OBD_ALLOC_POST(ptr, size, name) ((void)0)
+#define OBD_FREE_PRE(ptr, size, name)   ((void)0)
+
+#endif /* !OBD_DEBUG_MEMUSAGE */
+
+#ifdef RANDOM_FAIL_ALLOC
+#define HAS_FAIL_ALLOC_FLAG OBD_FAIL_CHECK(OBD_FAIL_GENERAL_ALLOC)
+#else
+#define HAS_FAIL_ALLOC_FLAG 0
+#endif
+
+#define OBD_ALLOC_FAIL_BITS 24
+#define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1)
+#define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100)
 
 #if defined(LUSTRE_UTILS) /* this version is for utils only */
 #define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
         (ptr) = cfs_alloc(size, (gfp_mask));                                  \
-        if ((ptr) == NULL) {                                                  \
+        if (unlikely((ptr) == NULL)) {                                        \
                 CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
                        (int)(size), __FILE__, __LINE__);                      \
         } else {                                                              \
@@ -280,21 +628,22 @@ do {                                                                          \
         }                                                                     \
 } while (0)
 #else /* this version is for the kernel and liblustre */
+#define OBD_FREE_RTN0(ptr)                                                    \
+({                                                                            \
+        cfs_free(ptr);                                                        \
+        (ptr) = NULL;                                                         \
+        0;                                                                    \
+})
 #define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
         (ptr) = cfs_alloc(size, (gfp_mask));                                  \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
-        } else {                                                              \
+        if (likely((ptr) != NULL &&                                           \
+                   (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
+                    !obd_alloc_fail(ptr, #ptr, "km", size,                    \
+                                    __FILE__, __LINE__) ||                    \
+                    OBD_FREE_RTN0(ptr)))){                                    \
                 memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
+                OBD_ALLOC_POST(ptr, size, "kmalloced");                       \
         }                                                                     \
 } while (0)
 #endif
@@ -314,26 +663,24 @@ do {                                                                          \
 # define OBD_VMALLOC(ptr, size)                                               \
 do {                                                                          \
         (ptr) = cfs_alloc_large(size);                                        \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+        if (unlikely((ptr) == NULL)) {                                        \
+                CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n",           \
+                       (int)(size));                                          \
+                CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \
+                       obd_memory_sum(), cfs_atomic_read(&libcfs_kmemory));   \
         } else {                                                              \
                 memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
+                OBD_ALLOC_POST(ptr, size, "vmalloced");                       \
         }                                                                     \
-} while (0)
+} while(0)
 #endif
 
 #ifdef CONFIG_DEBUG_SLAB
 #define POISON(ptr, c, s) do {} while (0)
+#define POISON_PTR(ptr)  ((void)0)
 #else
 #define POISON(ptr, c, s) memset(ptr, c, s)
+#define POISON_PTR(ptr)  (ptr) = (void *)0xdeadbeef
 #endif
 
 #ifdef POISON_BULK
@@ -346,78 +693,132 @@ do {                                                                          \
 #ifdef __KERNEL__
 #define OBD_FREE(ptr, size)                                                   \
 do {                                                                          \
-        LASSERT(ptr);                                                         \
-        atomic_sub(size, &obd_memory);                                        \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",           \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        POISON(ptr, 0x5a, size);                                              \
+        OBD_FREE_PRE(ptr, size, "kfreed");                                    \
         cfs_free(ptr);                                                        \
-        (ptr) = (void *)0xdeadbeef;                                           \
-} while (0)
+        POISON_PTR(ptr);                                                      \
+} while(0)
+
+
+#ifdef HAVE_RCU
+# ifdef HAVE_CALL_RCU_PARAM
+#  define my_call_rcu(rcu, cb)            call_rcu(rcu, cb, rcu)
+# else
+#  define my_call_rcu(rcu, cb)            call_rcu(rcu, cb)
+# endif
 #else
-#define OBD_FREE(ptr, size) ((void)(size), free((ptr)))
+# define my_call_rcu(rcu, cb)             (cb)(rcu)
 #endif
 
+#define OBD_FREE_RCU_CB(ptr, size, handle, free_cb)                           \
+do {                                                                          \
+        struct portals_handle *__h = (handle);                                \
+        LASSERT(handle);                                                      \
+        __h->h_ptr = (ptr);                                                   \
+        __h->h_size = (size);                                                 \
+        __h->h_free_cb = (void (*)(void *, size_t))(free_cb);                 \
+        my_call_rcu(&__h->h_rcu, class_handle_free_cb);                       \
+        POISON_PTR(ptr);                                                      \
+} while(0)
+#define OBD_FREE_RCU(ptr, size, handle) OBD_FREE_RCU_CB(ptr, size, handle, NULL)
+
+#else
+#define OBD_FREE(ptr, size) ((void)(size), free((ptr)))
+#define OBD_FREE_RCU(ptr, size, handle) (OBD_FREE(ptr, size))
+#define OBD_FREE_RCU_CB(ptr, size, handle, cb)     ((*(cb))(ptr, size))
+#endif /* ifdef __KERNEL__ */
+
 #ifdef __arch_um__
 # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size)
 #else
 # define OBD_VFREE(ptr, size)                                                 \
 do {                                                                          \
-        LASSERT(ptr);                                                         \
-        atomic_sub(size, &obd_memory);                                        \
-        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",           \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        POISON(ptr, 0x5a, size);                                              \
+        OBD_FREE_PRE(ptr, size, "vfreed");                                    \
         cfs_free_large(ptr);                                                  \
-        (ptr) = (void *)0xdeadbeef;                                           \
-} while (0)
+        POISON_PTR(ptr);                                                      \
+} while(0)
 #endif
 
 /* we memset() the slab object to 0 when allocation succeeds, so DO NOT
  * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
  * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
+#define OBD_SLAB_FREE_RTN0(ptr, slab)                                         \
+({                                                                            \
+        cfs_mem_cache_free((slab), (ptr));                                    \
+        (ptr) = NULL;                                                         \
+        0;                                                                    \
+})
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
-        LASSERT(!in_interrupt());                                             \
+        LASSERT(ergo(type != CFS_ALLOC_ATOMIC, !cfs_in_interrupt()));         \
         (ptr) = cfs_mem_cache_alloc(slab, (type));                            \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
-        } else {                                                              \
+        if (likely((ptr) != NULL &&                                           \
+                   (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
+                    !obd_alloc_fail(ptr, #ptr, "slab-", size,                 \
+                                    __FILE__, __LINE__) ||                    \
+                    OBD_SLAB_FREE_RTN0(ptr, slab)))) {                        \
                 memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
+                OBD_ALLOC_POST(ptr, size, "slab-alloced");                    \
         }                                                                     \
-} while (0)
+} while(0)
 
 #define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr))
 
 #define OBD_SLAB_FREE(ptr, slab, size)                                        \
 do {                                                                          \
-        LASSERT(ptr);                                                         \
-        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n",       \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        atomic_sub(size, &obd_memory);                                        \
-        POISON(ptr, 0x5a, size);                                              \
+        OBD_FREE_PRE(ptr, size, "slab-freed");                                \
         cfs_mem_cache_free(slab, ptr);                                        \
-        (ptr) = (void *)0xdeadbeef;                                           \
+        POISON_PTR(ptr);                                                      \
+} while(0)
+
+#define OBD_SLAB_ALLOC_PTR(ptr, slab)                                         \
+        OBD_SLAB_ALLOC((ptr), (slab), CFS_ALLOC_STD, sizeof *(ptr))
+#define OBD_SLAB_FREE_PTR(ptr, slab)                                          \
+        OBD_SLAB_FREE((ptr), (slab), sizeof *(ptr))
+#define OBD_SLAB_ALLOC_PTR_GFP(ptr, slab, gfp)                              \
+        OBD_SLAB_ALLOC((ptr), (slab), (gfp), sizeof *(ptr))
+
+#define KEY_IS(str) \
+        (keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0)
+
+/* Wrapper for contiguous page frame allocation */
+#define OBD_PAGES_ALLOC(ptr, order, gfp_mask)                                 \
+do {                                                                          \
+        (ptr) = cfs_alloc_pages(gfp_mask, order);                             \
+        if (unlikely((ptr) == NULL)) {                                        \
+                CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\
+                       "failed\n", (int)(1 << (order)),                       \
+                       (__u64)((1 << (order)) << CFS_PAGE_SHIFT));            \
+                CERROR(LPU64" total bytes and "LPU64" total pages "           \
+                       "("LPU64" bytes) allocated by Lustre, "                \
+                       "%d total bytes by LNET\n",                            \
+                       obd_memory_sum(),                                      \
+                       obd_pages_sum() << CFS_PAGE_SHIFT,                     \
+                       obd_pages_sum(),                                       \
+                       cfs_atomic_read(&libcfs_kmemory));                     \
+        } else {                                                              \
+                obd_pages_add(order);                                         \
+                CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / "      \
+                       LPU64" bytes at %p.\n",                                \
+                       (int)(1 << (order)),                                   \
+                       (__u64)((1 << (order)) << CFS_PAGE_SHIFT), ptr);       \
+        }                                                                     \
 } while (0)
 
-#define KEY_IS(str) (keylen >= strlen(key) && strcmp(key, str) == 0)
+#define OBD_PAGE_ALLOC(ptr, gfp_mask)                                         \
+        OBD_PAGES_ALLOC(ptr, 0, gfp_mask)
 
-#if defined(__linux__)
-#include <linux/obd_support.h>
-#elif defined(__APPLE__)
-#include <darwin/obd_support.h>
-#elif defined(__WINNT__)
-#include <winnt/obd_support.h>
-#else
-#error Unsupported operating system.
-#endif
+#define OBD_PAGES_FREE(ptr, order)                                            \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        obd_pages_sub(order);                                                 \
+        CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \
+               "at %p.\n",                                                    \
+               (int)(1 << (order)), (__u64)((1 << (order)) << CFS_PAGE_SHIFT),\
+               ptr);                                                          \
+        __cfs_free_pages(ptr, order);                                         \
+        (ptr) = (void *)0xdeadbeef;                                           \
+} while (0)
+
+#define OBD_PAGE_FREE(ptr) OBD_PAGES_FREE(ptr, 0)
 
 #endif