Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / include / obd_support.h
index befcde8..fd4d87f 100644 (file)
 #ifndef _OBD_SUPPORT
 #define _OBD_SUPPORT
 
-#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
 #include <lvfs.h>
 #include <lprocfs_status.h>
 
+#if defined(__linux__)
+#include <linux/obd_support.h>
+#elif defined(__APPLE__)
+#include <darwin/obd_support.h>
+#elif defined(__WINNT__)
+#include <winnt/obd_support.h>
+#else
+#error Unsupported operating system.
+#endif
+
 /* global variables */
 extern struct lprocfs_stats *obd_memory;
 enum {
@@ -35,16 +45,21 @@ enum {
         OBD_STATS_NUM,
 };
 
-extern unsigned int obd_fail_loc;
+enum {
+        OBD_FAIL_LOC_NOSET      = 0,
+        OBD_FAIL_LOC_ORSET      = 1,
+        OBD_FAIL_LOC_RESET      = 2
+};
+
+extern unsigned long obd_fail_loc;
 extern unsigned int obd_fail_val;
 extern unsigned int obd_debug_peer_on_timeout;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_dump_on_eviction;
+/* obd_timeout should only be used for recovery, not for
+   networking / disk / timings affected by load (use Adaptive Timeouts) */
 extern unsigned int obd_timeout;          /* seconds */
-#define PING_INTERVAL max(obd_timeout / 4, 1U)
-#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
-extern unsigned int ldlm_timeout;
-extern unsigned int obd_health_check_timeout;
+extern unsigned int ldlm_timeout;         /* seconds */
 extern unsigned int obd_sync_filter;
 extern unsigned int obd_max_dirty_pages;
 extern atomic_t obd_dirty_pages;
@@ -52,12 +67,48 @@ extern cfs_waitq_t obd_race_waitq;
 extern int obd_race_state;
 extern unsigned int obd_alloc_fail_rate;
 
+int __obd_fail_check_set(__u32 id, __u32 value, int set);
+int __obd_fail_timeout_set(__u32 id, __u32 value, int ms, int set);
+
+/* lvfs.c */
+int obd_alloc_fail(const void *ptr, const char *name, const char *type,
+                   size_t size, const char *file, int line);
+
 /* Timeout definitions */
-#define LDLM_TIMEOUT_DEFAULT 20
 #define OBD_TIMEOUT_DEFAULT 100
-#define HEALTH_CHECK_COEF 3 / 2
-#define HEALTH_CHECK_TIMEOUT_DEFAULT (OBD_TIMEOUT_DEFAULT * HEALTH_CHECK_COEF)
-#define HEALTH_CHECK_TIMEOUT (obd_timeout * HEALTH_CHECK_COEF)
+#define LDLM_TIMEOUT_DEFAULT 20
+/* Time to wait for all clients to reconnect during recovery */
+/* Should be very conservative; must catch the first reconnect after reboot */
+#define OBD_RECOVERY_FACTOR (3) /* times obd_timeout */
+/* Change recovery-small 26b time if you change this */
+#define PING_INTERVAL max(obd_timeout / 4, 1U)
+/* Client may skip 1 ping; we must wait at least 2.5. But for multiple
+ * failover targets the client only pings one server at a time, and pings
+ * can be lost on a loaded network. Since eviction has serious consequences,
+ * and there's no urgent need to evict a client just because it's idle, we
+ * should be very conservative here. */
+#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
+#define DISK_TIMEOUT 50          /* Beyond this we warn about disk speed */
+#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */
+ /* Max connect interval for nonresponsive servers; ~50s to avoid building up
+    connect requests in the LND queues, but within obd_timeout so we don't
+    miss the recovery window */
+#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout))
+#define CONNECTION_SWITCH_INC 5  /* Connection timeout backoff */
+#ifndef CRAY_XT3
+/* In general this should be low to have quick detection of a system
+   running on a backup server. (If it's too low, import_select_connection
+   will increase the timeout anyhow.)  */
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
+#else
+/* ...but for very large systems (e.g. CRAY) we need to keep the initial
+   connect t.o. high (bz 10803), because they will nearly ALWAYS be doing the
+   connects for the first time (clients "reboot" after every process, so no
+   chance to generate adaptive timeout data. */
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/2)
+#endif
+#define LONG_UNLINK 300          /* Unlink should happen before now */
+
 
 #define OBD_FAIL_MDS                     0x100
 #define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
@@ -122,6 +173,8 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_MDS_LLOG_CREATE_FAILED  0x13c
 #define OBD_FAIL_MDS_OSC_PRECREATE       0x13d
 #define OBD_FAIL_MDS_LOV_SYNC_RACE       0x13e
+#define OBD_FAIL_MDS_CLOSE_NET_REP       0x13f
+#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT   0x140
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -141,7 +194,7 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_OST_BRW_READ_BULK       0x20f
 #define OBD_FAIL_OST_SYNC_NET            0x210
 #define OBD_FAIL_OST_ALL_REPLY_NET       0x211
-#define OBD_FAIL_OST_ALL_REQUESTS_NET    0x212
+#define OBD_FAIL_OST_ALL_REQUEST_NET     0x212
 #define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
 #define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
 #define OBD_FAIL_OST_ENOSPC              0x215
@@ -156,6 +209,11 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_OST_SETATTR_CREDITS     0x21e
 #define OBD_FAIL_OST_HOLD_WRITE_RPC      0x21f
 #define OBD_FAIL_OST_BRW_WRITE_BULK2     0x220
+#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
+#define OBD_FAIL_OST_PAUSE_CREATE        0x223
+#define OBD_FAIL_OST_BRW_PAUSE_PACK      0x224
+#define OBD_FAIL_OST_CONNECT_NET2        0x225
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
@@ -175,6 +233,9 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_LDLM_GLIMPSE            0x30f
 #define OBD_FAIL_LDLM_CANCEL_RACE        0x310
 #define OBD_FAIL_LDLM_CANCEL_EVICT_RACE  0x311
+#define OBD_FAIL_LDLM_PAUSE_CANCEL       0x312
+#define OBD_FAIL_LDLM_CLOSE_THREAD       0x313
+#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE  0x314
 
 #define OBD_FAIL_OSC                     0x400
 #define OBD_FAIL_OSC_BRW_READ_BULK       0x401
@@ -187,6 +248,9 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_OSC_CHECKSUM_RECEIVE    0x408
 #define OBD_FAIL_OSC_CHECKSUM_SEND       0x409
 #define OBD_FAIL_OSC_BRW_PREP_REQ2       0x40a
+#define OBD_FAIL_OSC_CONNECT_CKSUM       0x40b
+#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
+#define OBD_FAIL_OSC_DIO_PAUSE           0x40d
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
@@ -196,6 +260,9 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_PTLRPC_DROP_RPC         0x505
 #define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
 #define OBD_FAIL_PTLRPC_DELAY_RECOV      0x507
+#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB   0x508
+#define OBD_FAIL_PTLRPC_PAUSE_REQ        0x50a
+#define OBD_FAIL_PTLRPC_PAUSE_REP        0x50c
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 #define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
@@ -209,22 +276,26 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_TGT_DELAY_CONNECT       0x703
 #define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
 #define OBD_FAIL_TGT_DELAY_PRECREATE     0x705
+#define OBD_FAIL_TGT_TOOMANY_THREADS     0x706
+#define OBD_FAIL_TGT_REPLAY_DROP         0x707
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
+#define OBD_FAIL_MDC_OLD_EXT_FLAGS       0x802
+#define OBD_FAIL_MDC_GETATTR_ENQUEUE     0x803
 
 #define OBD_FAIL_MGS                     0x900
 #define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
 #define OBD_FAIL_MGS_ALL_REPLY_NET       0x902
-#define OBD_FAIL_MGC_PROCESS_LOG         0x903
-#define OBD_FAIL_MGS_SLOW_REQUEST_NET    0x904
-#define OBD_FAIL_MGS_SLOW_TARGET_REG     0x905
+#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG   0x903
+#define OBD_FAIL_MGS_PAUSE_REQ           0x904
+#define OBD_FAIL_MGS_PAUSE_TARGET_REG    0x905
 
-#define OBD_FAIL_QUOTA_QD_COUNT_32BIT    0xa00
+#define OBD_FAIL_QUOTA_QD_COUNT_32BIT    0xA00
 
-#define OBD_FAIL_LPROC_REMOVE            0xb00
+#define OBD_FAIL_LPROC_REMOVE            0xB00
 
-#define OBD_FAIL_GENERAL_ALLOC           0xc00
+#define OBD_FAIL_GENERAL_ALLOC           0xC00
 
 #define OBD_FAIL_SEQ                     0x1000
 #define OBD_FAIL_SEQ_QUERY_NET           0x1001
@@ -236,132 +307,114 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_SEC_CTX_INIT_NET        0x1201
 #define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1202
 #define OBD_FAIL_SEC_CTX_FINI_NET        0x1203
+#define OBD_FAIL_SEC_CTX_HDL_PAUSE       0x1204
 
 /* Failure injection control */
 #define OBD_FAIL_MASK_SYS    0x0000FF00
 #define OBD_FAIL_MASK_LOC   (0x000000FF | OBD_FAIL_MASK_SYS)
-#define OBD_FAIL_ONCE        0x80000000
-#define OBD_FAILED           0x40000000
+
+#define OBD_FAILED_BIT       30
+/* OBD_FAILED is 0x40000000 */
+#define OBD_FAILED          (1 << OBD_FAILED_BIT)
+
+#define OBD_FAIL_ONCE_BIT    31
+/* OBD_FAIL_ONCE is 0x80000000 */
+#define OBD_FAIL_ONCE       (1 << OBD_FAIL_ONCE_BIT)
 
 /* The following flags aren't made to be combined */
-#define OBD_FAIL_SKIP        0x20000000 /* skip N then fail */
-#define OBD_FAIL_SOME        0x10000000 /* fail N times */
-#define OBD_FAIL_RAND        0x08000000 /* fail 1/N of the time */
+#define OBD_FAIL_SKIP        0x20000000 /* skip N times then fail */
+#define OBD_FAIL_SOME        0x10000000 /* only fail N times */
+#define OBD_FAIL_RAND        0x08000000 /* fail 1/N of the times */
 #define OBD_FAIL_USR1        0x04000000 /* user flag */
 
-static inline int obd_fail_check(__u32 id)
+#define OBD_FAIL_PRECHECK(id) (obd_fail_loc &&                                \
+                              (obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
+                              ((id) & OBD_FAIL_MASK_LOC))
+
+static inline int obd_fail_check_set(__u32 id, __u32 value, int set)
 {
-        static int count = 0;
-        if (likely((obd_fail_loc & OBD_FAIL_MASK_LOC) != 
-                   (id & OBD_FAIL_MASK_LOC)))
-                return 0;
-        
-        if ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE)) ==
-            (OBD_FAILED | OBD_FAIL_ONCE)) {
-                count = 0; /* paranoia */
-                return 0;
+        int ret = 0;
+        if (unlikely(OBD_FAIL_PRECHECK(id) &&
+            (ret = __obd_fail_check_set(id, value, set)))) {
+                CERROR("*** obd_fail_loc=%x ***\n", id);
         }
+        return ret;
+}
 
-        if (obd_fail_loc & OBD_FAIL_RAND) {
-                unsigned int ll_rand(void);
-                if (obd_fail_val < 2)
-                        return 0;
-                if (ll_rand() % obd_fail_val > 0)
-                        return 0;
-        }
+/* If id hit obd_fail_loc, return 1, otherwise return 0 */
+#define OBD_FAIL_CHECK(id) \
+        obd_fail_check_set(id, 0, OBD_FAIL_LOC_NOSET)
 
-        if (obd_fail_loc & OBD_FAIL_SKIP) {
-                count++;
-                if (count < obd_fail_val) 
-                        return 0;
-                count = 0;
-        }
+/* If id hit obd_fail_loc, obd_fail_loc |= value and return 1,
+ * otherwise return 0 */
+#define OBD_FAIL_CHECK_ORSET(id, value) \
+        obd_fail_check_set(id, value, OBD_FAIL_LOC_ORSET)
 
-        /* Overridden by FAIL_ONCE */
-        if (obd_fail_loc & OBD_FAIL_SOME) {
-                count++;
-                if (count >= obd_fail_val) {
-                        count = 0;
-                        /* Don't fail anymore */
-                        obd_fail_loc |= OBD_FAIL_ONCE;
-                }
-        }
+/* If id hit obd_fail_loc, obd_fail_loc = value and return 1,
+ * otherwise return 0 */
+#define OBD_FAIL_CHECK_RESET(id, value) \
+        obd_fail_check_set(id, value, OBD_FAIL_LOC_RESET)
 
-        obd_fail_loc |= OBD_FAILED;
-        /* Handle old checks that OR in this */
-        if (id & OBD_FAIL_ONCE)
-                obd_fail_loc |= OBD_FAIL_ONCE;
 
-        return 1;
+static inline int obd_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
+{
+        if (unlikely(OBD_FAIL_PRECHECK(id)))
+                return __obd_fail_timeout_set(id, value, ms, set);
+        else
+                return 0;
 }
 
-#define OBD_FAIL_CHECK(id)                                                   \
-({                                                                           \
-        int _ret_ = 0;                                                       \
-        if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) {        \
-                CERROR("*** obd_fail_loc=%x ***\n", id);                     \
-        }                                                                    \
-        _ret_;                                                               \
-})
-
-/* deprecated - just use OBD_FAIL_CHECK */
-#define OBD_FAIL_CHECK_ONCE OBD_FAIL_CHECK
+/* If id hit obd_fail_loc, sleep for seconds or milliseconds */
+#define OBD_FAIL_TIMEOUT(id, secs) \
+        obd_fail_timeout_set(id, 0, secs * 1000, OBD_FAIL_LOC_NOSET)
 
-#define OBD_FAIL_RETURN(id, ret)                                             \
-do {                                                                         \
-        if (unlikely(obd_fail_loc && obd_fail_check(id))) {                  \
-                CERROR("*** obd_fail_return=%x rc=%d ***\n", id, ret);       \
-                RETURN(ret);                                                 \
-        }                                                                    \
-} while(0)
+#define OBD_FAIL_TIMEOUT_MS(id, ms) \
+        obd_fail_timeout_set(id, 0, ms, OBD_FAIL_LOC_NOSET)
 
-#define OBD_FAIL_TIMEOUT(id, secs)                                           \
-({      int _ret_ = 0;                                                       \
-        if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) {        \
-                CERROR("obd_fail_timeout id %x sleeping for %d secs\n",      \
-                       (id), (secs));                                        \
-                set_current_state(TASK_UNINTERRUPTIBLE);                     \
-                cfs_schedule_timeout(CFS_TASK_UNINT,                         \
-                                    cfs_time_seconds(secs));                 \
-                set_current_state(TASK_RUNNING);                             \
-                CERROR("obd_fail_timeout id %x awake\n", (id));              \
-        }                                                                    \
-        _ret_;                                                               \
-})
+/* If id hit obd_fail_loc, obd_fail_loc |= value and
+ * sleep seconds or milliseconds */
+#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) \
+        obd_fail_timeout_set(id, value, secs * 1000, OBD_FAIL_LOC_ORSET)
 
-#define OBD_FAIL_TIMEOUT_MS(id, ms)                                          \
-({      int _ret_ = 0;                                                       \
-        if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) {        \
-                CERROR("obd_fail_timeout id %x sleeping for %d ms\n",        \
-                       (id), (ms));                                          \
-                set_current_state(TASK_UNINTERRUPTIBLE);                     \
-                cfs_schedule_timeout(CFS_TASK_UNINT,                         \
-                                     cfs_time_seconds(ms)/1000);             \
-                set_current_state(TASK_RUNNING);                             \
-                CERROR("obd_fail_timeout id %x awake\n", (id));              \
-        }                                                                    \
-        _ret_;                                                               \
-})
+#define OBD_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
+        obd_fail_timeout_set(id, value, ms, OBD_FAIL_LOC_ORSET)
 
 #ifdef __KERNEL__
+static inline void obd_fail_write(int id, struct super_block *sb)
+{
+        /* We set FAIL_ONCE because we never "un-fail" a device */
+        if (OBD_FAIL_CHECK_ORSET(id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
+#ifdef LIBCFS_DEBUG
+                BDEVNAME_DECLARE_STORAGE(tmp);
+                CERROR("obd_fail_loc=%x, fail write operation on %s\n",
+                       id, ll_bdevname(sb, tmp));
+#endif
+                /* TODO-CMD: fix getting jdev */
+                __lvfs_set_rdonly(lvfs_sbdev(sb), (lvfs_sbdev_type)0);
+        }
+}
+#define OBD_FAIL_WRITE(id, sb) obd_fail_write(id, sb)
+
 /* The idea here is to synchronise two threads to force a race. The
  * first thread that calls this with a matching fail_loc is put to
  * sleep. The next thread that calls with the same fail_loc wakes up
  * the first and continues. */
-#define OBD_RACE(id)                                                         \
-do {                                                                         \
-        if (unlikely(obd_fail_loc && obd_fail_check(id))) {                  \
-                obd_race_state = 0;                                          \
-                CERROR("obd_race id %x sleeping\n", (id));                   \
-                OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);           \
-                CERROR("obd_fail_race id %x awake\n", (id));                 \
-        } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) ==                     \
-                    ((id) & OBD_FAIL_MASK_LOC)) {                            \
-                CERROR("obd_fail_race id %x waking\n", (id));                \
-                obd_race_state = 1;                                          \
-                wake_up(&obd_race_waitq);                                    \
-        }                                                                    \
-} while(0)
+static inline void obd_race(__u32 id)
+{
+        if (OBD_FAIL_PRECHECK(id)) {
+                if (unlikely(__obd_fail_check_set(id, 0, OBD_FAIL_LOC_NOSET))) {
+                        obd_race_state = 0;
+                        CERROR("obd_race id %x sleeping\n", id);
+                        OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);
+                        CERROR("obd_fail_race id %x awake\n", id);
+                } else {
+                        CERROR("obd_fail_race id %x waking\n", id);
+                        obd_race_state = 1;
+                        wake_up(&obd_race_waitq);
+                }
+        }
+}
+#define OBD_RACE(id) obd_race(id)
 #else
 /* sigh.  an expedient fix until OBD_RACE is fixed up */
 #define OBD_RACE(foo) do {} while(0)
@@ -371,7 +424,7 @@ do {                                                                         \
 
 extern atomic_t libcfs_kmemory;
 
-#ifdef LPROCFS 
+#ifdef LPROCFS
 #define obd_memory_add(size)                                                  \
         lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size))
 #define obd_memory_sub(size)                                                  \
@@ -413,7 +466,7 @@ static inline void obd_memory_sub(long size)
         obd_alloc -= size;
 }
 
-static inline void obd_pages_add(int order) 
+static inline void obd_pages_add(int order)
 {
         obd_pages += 1<< order;
         if (obd_pages > obd_max_pages)
@@ -710,7 +763,7 @@ do {                                                                          \
         cfs_mem_cache_free((slab), (ptr));                                    \
         (ptr) = NULL;                                                         \
         0;                                                                    \
-}) 
+})
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
         LASSERT(!in_interrupt());                                             \
@@ -739,7 +792,8 @@ do {                                                                          \
 #define OBD_SLAB_FREE_PTR(ptr, slab)                                          \
         OBD_SLAB_FREE((ptr), (slab), sizeof *(ptr))
 
-#define KEY_IS(str) (keylen >= strlen(str) && strncmp(key, str, keylen) == 0)
+#define KEY_IS(str) \
+        (keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0)
 
 /* Wrapper for contiguous page frame allocation */
 #define OBD_PAGES_ALLOC(ptr, order, gfp_mask)                                 \
@@ -782,14 +836,4 @@ do {                                                                          \
 
 #define OBD_PAGE_FREE(ptr) OBD_PAGES_FREE(ptr, 0)
 
-#if defined(__linux__)
-#include <linux/obd_support.h>
-#elif defined(__APPLE__)
-#include <darwin/obd_support.h>
-#elif defined(__WINNT__)
-#include <winnt/obd_support.h>
-#else
-#error Unsupported operating system.
-#endif
-
 #endif