Whamcloud - gitweb
b=10676
authornathan <nathan>
Thu, 11 Oct 2007 19:52:12 +0000 (19:52 +0000)
committernathan <nathan>
Thu, 11 Oct 2007 19:52:12 +0000 (19:52 +0000)
i=adilger
i=wangdi
enhanced obd_fail functionality

lustre/include/obd_support.h
lustre/mdt/mdt_handler.c
lustre/obdclass/class_obd.c
lustre/obdclass/linux/linux-sysctl.c
lustre/obdfilter/filter.c
lustre/tests/test-framework.sh
lustre/utils/mount_lustre.c

index 8452374..ca70d84 100644 (file)
@@ -29,6 +29,7 @@
 extern atomic_t obd_memory;
 extern int obd_memmax;
 extern unsigned int obd_fail_loc;
 extern atomic_t obd_memory;
 extern int obd_memmax;
 extern unsigned int obd_fail_loc;
+extern unsigned int obd_fail_val;
 extern unsigned int obd_debug_peer_on_timeout;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_dump_on_eviction;
 extern unsigned int obd_debug_peer_on_timeout;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_dump_on_eviction;
@@ -225,39 +226,87 @@ extern int obd_race_state;
 #define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1202
 #define OBD_FAIL_SEC_CTX_FINI_NET        0x1203
 
 #define OBD_FAIL_SEC_CTX_INIT_CONT_NET   0x1202
 #define OBD_FAIL_SEC_CTX_FINI_NET        0x1203
 
-/* preparation for a more advanced failure testbed (not functional yet) */
+/* Failure injection control */
 #define OBD_FAIL_MASK_SYS    0x0000FF00
 #define OBD_FAIL_MASK_SYS    0x0000FF00
-#define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
+#define OBD_FAIL_MASK_LOC   (0x000000FF | OBD_FAIL_MASK_SYS)
 #define OBD_FAIL_ONCE        0x80000000
 #define OBD_FAILED           0x40000000
 
 #define OBD_FAIL_ONCE        0x80000000
 #define OBD_FAILED           0x40000000
 
-#define OBD_FAIL_CHECK(id)   (((obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
-                              ((id) & OBD_FAIL_MASK_LOC)) &&                   \
-                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \
-                                (OBD_FAILED | OBD_FAIL_ONCE)))
+/* The following flags aren't made to be combined */
+#define OBD_FAIL_SKIP        0x20000000 /* skip N then fail */
+#define OBD_FAIL_SOME        0x10000000 /* fail N times */
+#define OBD_FAIL_RAND        0x08000000 /* fail 1/N of the time */
+#define OBD_FAIL_USR1        0x04000000 /* user flag */
 
 
-#define OBD_FAIL_CHECK_ONCE(id)                                              \
-({      int _ret_ = 0;                                                       \
-        if (unlikely(OBD_FAIL_CHECK(id))) {                                  \
-                CERROR("*** obd_fail_loc=0x%x ***\n", id);                   \
-                obd_fail_loc |= OBD_FAILED;                                  \
-                if ((id) & OBD_FAIL_ONCE)                                    \
-                        obd_fail_loc |= OBD_FAIL_ONCE;                       \
-                _ret_ = 1;                                                   \
+static inline int obd_fail_check(__u32 id)
+{
+        static int count = 0;
+        if (likely((obd_fail_loc & OBD_FAIL_MASK_LOC) != 
+                   (id & OBD_FAIL_MASK_LOC)))
+                return 0;
+        
+        if ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE)) ==
+            (OBD_FAILED | OBD_FAIL_ONCE)) {
+                count = 0; /* paranoia */
+                return 0;
+        }
+
+        if (obd_fail_loc & OBD_FAIL_RAND) {
+                unsigned int ll_rand(void);
+                if (obd_fail_val < 2)
+                        return 0;
+                if (ll_rand() % obd_fail_val > 0)
+                        return 0;
+        }
+
+        if (obd_fail_loc & OBD_FAIL_SKIP) {
+                count++;
+                if (count < obd_fail_val) 
+                        return 0;
+                count = 0;
+        }
+
+        /* Overridden by FAIL_ONCE */
+        if (obd_fail_loc & OBD_FAIL_SOME) {
+                count++;
+                if (count >= obd_fail_val) {
+                        count = 0;
+                        /* Don't fail anymore */
+                        obd_fail_loc |= OBD_FAIL_ONCE;
+                }
+        }
+
+        obd_fail_loc |= OBD_FAILED;
+        /* Handle old checks that OR in this */
+        if (id & OBD_FAIL_ONCE)
+                obd_fail_loc |= OBD_FAIL_ONCE;
+
+        return 1;
+}
+
+#define OBD_FAIL_CHECK(id)                                                   \
+({                                                                           \
+        int _ret_ = 0;                                                       \
+        if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) {        \
+                CERROR("*** obd_fail_loc=%x ***\n", id);                     \
         }                                                                    \
         _ret_;                                                               \
 })
 
         }                                                                    \
         _ret_;                                                               \
 })
 
+/* deprecated - just use OBD_FAIL_CHECK */
+#define OBD_FAIL_CHECK_ONCE OBD_FAIL_CHECK
+
 #define OBD_FAIL_RETURN(id, ret)                                             \
 do {                                                                         \
 #define OBD_FAIL_RETURN(id, ret)                                             \
 do {                                                                         \
-        if (unlikely(OBD_FAIL_CHECK_ONCE(id))) {                             \
+        if (unlikely(obd_fail_loc && obd_fail_check(id))) {                  \
+                CERROR("*** obd_fail_return=%x rc=%d ***\n", id, ret);       \
                 RETURN(ret);                                                 \
         }                                                                    \
 } while(0)
 
 #define OBD_FAIL_TIMEOUT(id, secs)                                           \
                 RETURN(ret);                                                 \
         }                                                                    \
 } while(0)
 
 #define OBD_FAIL_TIMEOUT(id, secs)                                           \
-do {                                                                         \
-        if (unlikely(OBD_FAIL_CHECK_ONCE(id))) {                             \
+({      int _ret_ = 0;                                                       \
+        if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) {        \
                 CERROR("obd_fail_timeout id %x sleeping for %d secs\n",      \
                        (id), (secs));                                        \
                 set_current_state(TASK_UNINTERRUPTIBLE);                     \
                 CERROR("obd_fail_timeout id %x sleeping for %d secs\n",      \
                        (id), (secs));                                        \
                 set_current_state(TASK_UNINTERRUPTIBLE);                     \
@@ -265,8 +314,23 @@ do {                                                                         \
                                     cfs_time_seconds(secs));                 \
                 set_current_state(TASK_RUNNING);                             \
                 CERROR("obd_fail_timeout id %x awake\n", (id));              \
                                     cfs_time_seconds(secs));                 \
                 set_current_state(TASK_RUNNING);                             \
                 CERROR("obd_fail_timeout id %x awake\n", (id));              \
-       }                                                                     \
-} while(0)
+        }                                                                    \
+        _ret_;                                                               \
+})
+
+#define OBD_FAIL_TIMEOUT_MS(id, ms)                                          \
+({      int _ret_ = 0;                                                       \
+        if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) {        \
+                CERROR("obd_fail_timeout id %x sleeping for %d ms\n",        \
+                       (id), (ms));                                          \
+                set_current_state(TASK_UNINTERRUPTIBLE);                     \
+                cfs_schedule_timeout(CFS_TASK_UNINT,                         \
+                                     cfs_time_seconds(ms)/1000);             \
+                set_current_state(TASK_RUNNING);                             \
+                CERROR("obd_fail_timeout id %x awake\n", (id));              \
+        }                                                                    \
+        _ret_;                                                               \
+})
 
 #ifdef __KERNEL__
 /* The idea here is to synchronise two threads to force a race. The
 
 #ifdef __KERNEL__
 /* The idea here is to synchronise two threads to force a race. The
@@ -275,7 +339,7 @@ do {                                                                         \
  * the first and continues. */
 #define OBD_RACE(id)                                                         \
 do {                                                                         \
  * the first and continues. */
 #define OBD_RACE(id)                                                         \
 do {                                                                         \
-        if (unlikely(OBD_FAIL_CHECK_ONCE(id))) {                             \
+        if (unlikely(obd_fail_loc && obd_fail_check(id))) {                  \
                 obd_race_state = 0;                                          \
                 CERROR("obd_race id %x sleeping\n", (id));                   \
                 OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);           \
                 obd_race_state = 0;                                          \
                 CERROR("obd_race id %x sleeping\n", (id));                   \
                 OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0);           \
index 54fd4ab..6ea17ac 100644 (file)
@@ -2024,7 +2024,7 @@ static int mdt_req_handle(struct mdt_thread_info *info,
         LASSERT(current->journal_info == NULL);
 
         /*
         LASSERT(current->journal_info == NULL);
 
         /*
-         * Do not use *_FAIL_CHECK_ONCE() macros, because they will stop
+         * Mask out OBD_FAIL_ONCE, because that will stop
          * correct handling of failed req later in ldlm due to doing
          * obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED without actually
          * correct actions like it is done in target_send_reply_msg().
          * correct handling of failed req later in ldlm due to doing
          * obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED without actually
          * correct actions like it is done in target_send_reply_msg().
@@ -2034,7 +2034,7 @@ static int mdt_req_handle(struct mdt_thread_info *info,
                  * Set to info->mti_fail_id to handler fail_id, it will be used
                  * later, and better than use default fail_id.
                  */
                  * Set to info->mti_fail_id to handler fail_id, it will be used
                  * later, and better than use default fail_id.
                  */
-                if (OBD_FAIL_CHECK(h->mh_fail_id)) {
+                if (OBD_FAIL_CHECK(h->mh_fail_id && OBD_FAIL_MASK_LOC)) {
                         info->mti_fail_id = h->mh_fail_id;
                         RETURN(0);
                 }
                         info->mti_fail_id = h->mh_fail_id;
                         RETURN(0);
                 }
index a429e15..84753fb 100644 (file)
@@ -58,6 +58,7 @@ int obd_memmax;
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned int obd_fail_loc;
 
 /* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned int obd_fail_loc;
+unsigned int obd_fail_val;
 unsigned int obd_debug_peer_on_timeout;
 unsigned int obd_dump_on_timeout;
 unsigned int obd_dump_on_eviction;
 unsigned int obd_debug_peer_on_timeout;
 unsigned int obd_dump_on_timeout;
 unsigned int obd_dump_on_eviction;
@@ -376,6 +377,7 @@ void *obd_psdev = NULL;
 
 EXPORT_SYMBOL(obd_devs);
 EXPORT_SYMBOL(obd_fail_loc);
 
 EXPORT_SYMBOL(obd_devs);
 EXPORT_SYMBOL(obd_fail_loc);
+EXPORT_SYMBOL(obd_fail_val);
 EXPORT_SYMBOL(obd_print_fail_loc);
 EXPORT_SYMBOL(obd_race_waitq);
 EXPORT_SYMBOL(obd_race_state);
 EXPORT_SYMBOL(obd_print_fail_loc);
 EXPORT_SYMBOL(obd_race_waitq);
 EXPORT_SYMBOL(obd_race_state);
index 4fd277c..ba3d186 100644 (file)
@@ -49,6 +49,7 @@ cfs_sysctl_table_header_t *obd_table_header = NULL;
 
 enum {
         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
 
 enum {
         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
+        OBD_FAIL_VAL,           /* userdata for fail loc */
         OBD_TIMEOUT,            /* RPC timeout before recovery/intr */
         OBD_DUMP_ON_TIMEOUT,    /* dump kernel debug log upon eviction */
         OBD_MEMUSED,            /* bytes currently OBD_ALLOCated */
         OBD_TIMEOUT,            /* RPC timeout before recovery/intr */
         OBD_DUMP_ON_TIMEOUT,    /* dump kernel debug log upon eviction */
         OBD_MEMUSED,            /* bytes currently OBD_ALLOCated */
@@ -89,6 +90,14 @@ static cfs_sysctl_table_t obd_table[] = {
                 .proc_handler = &proc_fail_loc
         },
         {
                 .proc_handler = &proc_fail_loc
         },
         {
+                .ctl_name = OBD_FAIL_VAL,
+                .procname = "fail_val",
+                .data     = &obd_fail_val,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        },
+        {
                 .ctl_name = OBD_TIMEOUT,
                 .procname = "timeout",
                 .data     = &obd_timeout,
                 .ctl_name = OBD_TIMEOUT,
                 .procname = "timeout",
                 .data     = &obd_timeout,
index c469ef0..3a244b0 100644 (file)
@@ -1636,8 +1636,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
                  * list (and potentially being added to l_pending_list by an
                  * AST) when we are going to drop this lock ASAP. */
                 if (lock->l_export->exp_libclient ||
                  * list (and potentially being added to l_pending_list by an
                  * AST) when we are going to drop this lock ASAP. */
                 if (lock->l_export->exp_libclient ||
-                    OBD_FAIL_CHECK(OBD_FAIL_LDLM_GLIMPSE)) {
-                        OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2);
+                    OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2)) {
                         ldlm_resource_unlink_lock(lock);
                         err = ELDLM_LOCK_ABORTED;
                 } else {
                         ldlm_resource_unlink_lock(lock);
                         err = ELDLM_LOCK_ABORTED;
                 } else {
index 481022c..7f3a1f6 100644 (file)
@@ -227,6 +227,7 @@ unload_modules() {
         echo "$LEAK_PORTALS" 1>&2
         mv $TMP/debug $TMP/debug-leak.`date +%s` || true
         echo "Memory leaks detected"
         echo "$LEAK_PORTALS" 1>&2
         mv $TMP/debug $TMP/debug-leak.`date +%s` || true
         echo "Memory leaks detected"
+       [ -n "$IGNORE_LEAK" ] && echo "ignoring leaks" && return 0
         return 254
     fi
     echo "modules unloaded."
         return 254
     fi
     echo "modules unloaded."
index 74d2910..0589431 100644 (file)
@@ -37,6 +37,7 @@
 #include "obdctl.h"
 #include <lustre_ver.h>
 #include <glob.h>
 #include "obdctl.h"
 #include <lustre_ver.h>
 #include <glob.h>
+#include <ctype.h>
 
 #define MAX_HW_SECTORS_KB_PATH  "queue/max_hw_sectors_kb"
 #define MAX_SECTORS_KB_PATH     "queue/max_sectors_kb"
 
 #define MAX_HW_SECTORS_KB_PATH  "queue/max_hw_sectors_kb"
 #define MAX_SECTORS_KB_PATH     "queue/max_sectors_kb"
@@ -186,6 +187,7 @@ static const struct opt_map opt_map[] = {
   { "nosuid",   0, MS_NOSUID },      /* don't honor suid executables */
   { "dev",      1, MS_NODEV  },      /* interpret device files  */
   { "nodev",    0, MS_NODEV  },      /* don't interpret devices */
   { "nosuid",   0, MS_NOSUID },      /* don't honor suid executables */
   { "dev",      1, MS_NODEV  },      /* interpret device files  */
   { "nodev",    0, MS_NODEV  },      /* don't interpret devices */
+  { "sync",     0, MS_SYNCHRONOUS},  /* synchronous I/O */
   { "async",    1, MS_SYNCHRONOUS},  /* asynchronous I/O */
   { "auto",     0, 0         },      /* Can be mounted using -a */
   { "noauto",   0, 0         },      /* Can only be mounted explicitly */
   { "async",    1, MS_SYNCHRONOUS},  /* asynchronous I/O */
   { "auto",     0, 0         },      /* Can be mounted using -a */
   { "noauto",   0, 0         },      /* Can only be mounted explicitly */
@@ -328,8 +330,9 @@ int set_tunables(char *source, int src_len)
 
         rc = stat(dev, &stat_buf);
         if (rc) {
 
         rc = stat(dev, &stat_buf);
         if (rc) {
-                fprintf(stderr, "warning: %s, device %s stat failed\n",
-                        strerror(errno), dev);
+                if (verbose)
+                        fprintf(stderr, "warning: %s, device %s stat failed\n",
+                                strerror(errno), dev);
                 return rc;
         }
 
                 return rc;
         }
 
@@ -337,8 +340,9 @@ int set_tunables(char *source, int src_len)
         minor = minor(stat_buf.st_rdev);
         rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info);
         if (rc) {
         minor = minor(stat_buf.st_rdev);
         rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info);
         if (rc) {
-                fprintf(stderr, "warning: failed to read entries under "
-                        "/sys/block\n");
+                if (verbose)
+                        fprintf(stderr, "warning: failed to read entries under "
+                                "/sys/block\n");
                 return rc;
         }
 
                 return rc;
         }