From ceca3d533a8d6a19204bcfe73791709d1c936075 Mon Sep 17 00:00:00 2001 From: nathan Date: Thu, 5 Jul 2007 23:42:33 +0000 Subject: [PATCH] b=10676 i=adilger i=wangdi b=12461 minor cleanup i=brian i=adilger --- lustre/include/obd_support.h | 105 ++++++++++++++++++++++++++++------- lustre/obdclass/class_obd.c | 2 + lustre/obdclass/linux/linux-sysctl.c | 3 + lustre/obdfilter/filter.c | 3 +- lustre/tests/test-framework.sh | 10 ++-- 5 files changed, 96 insertions(+), 27 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index debbf72..e548a1d 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -29,6 +29,7 @@ extern atomic_t obd_memory; extern int obd_memmax; extern unsigned int obd_fail_loc; +extern unsigned int obd_fail_val; extern unsigned int obd_debug_peer_on_timeout; extern unsigned int obd_dump_on_timeout; extern unsigned int obd_dump_on_eviction; @@ -203,39 +204,86 @@ extern int obd_race_state; #define OBD_FAIL_LPROC_REMOVE 0xB00 -/* preparation for a more advanced failure testbed (not functional yet) */ +/* Failure injection control */ #define OBD_FAIL_MASK_SYS 0x0000FF00 -#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) +#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) #define OBD_FAIL_ONCE 0x80000000 #define OBD_FAILED 0x40000000 - -#define OBD_FAIL_CHECK(id) (((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) && \ - ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \ - (OBD_FAILED | OBD_FAIL_ONCE))) - -#define OBD_FAIL_CHECK_ONCE(id) \ -({ int _ret_ = 0; \ - if (OBD_FAIL_CHECK(id)) { \ +/* The following flags aren't made to be combined */ +#define OBD_FAIL_SKIP 0x20000000 /* skip N then fail */ +#define OBD_FAIL_SOME 0x10000000 /* fail N times */ +#define OBD_FAIL_RAND 0x08000000 /* fail 1/N of the time */ +#define OBD_FAIL_USR1 0x04000000 /* user flag */ + +static inline int obd_fail_check(__u32 id) +{ + static int count = 0; + if (likely((obd_fail_loc & OBD_FAIL_MASK_LOC) != + (id & OBD_FAIL_MASK_LOC))) + return 0; + + if ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE)) == + (OBD_FAILED | OBD_FAIL_ONCE)) { + count = 0; /* paranoia */ + return 0; + } + + if (obd_fail_loc & OBD_FAIL_RAND) { + unsigned int ll_rand(void); + if (obd_fail_val < 2) + return 0; + if (ll_rand() % obd_fail_val > 0) + return 0; + } + + if (obd_fail_loc & OBD_FAIL_SKIP) { + count++; + if (count < obd_fail_val) + return 0; + count = 0; + } + + /* Overridden by FAIL_ONCE */ + if (obd_fail_loc & OBD_FAIL_SOME) { + count++; + if (count >= obd_fail_val) { + count = 0; + /* Don't fail anymore */ + obd_fail_loc |= OBD_FAIL_ONCE; + } + } + + obd_fail_loc |= OBD_FAILED; + /* Handle old checks that OR in this */ + if (id & OBD_FAIL_ONCE) + obd_fail_loc |= OBD_FAIL_ONCE; + + return 1; +} + +#define OBD_FAIL_CHECK(id) \ +({ \ + int _ret_ = 0; \ + if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) { \ CERROR("*** obd_fail_loc=%x ***\n", id); \ - obd_fail_loc |= OBD_FAILED; \ - if ((id) & OBD_FAIL_ONCE) \ - obd_fail_loc |= OBD_FAIL_ONCE; \ - _ret_ = 1; \ } \ _ret_; \ }) +/* deprecated - just use OBD_FAIL_CHECK */ +#define OBD_FAIL_CHECK_ONCE OBD_FAIL_CHECK + #define OBD_FAIL_RETURN(id, ret) \ do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ + if (unlikely(obd_fail_loc && obd_fail_check(id))) { \ + CERROR("*** obd_fail_return=%x rc=%d ***\n", id, ret); \ RETURN(ret); \ } \ } while(0) #define OBD_FAIL_TIMEOUT(id, secs) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ +({ int _ret_ = 0; \ + if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) { \ CERROR("obd_fail_timeout id %x sleeping for %d secs\n", \ (id), (secs)); \ set_current_state(TASK_UNINTERRUPTIBLE); \ @@ -243,8 +291,23 @@ do { \ cfs_time_seconds(secs)); \ set_current_state(TASK_RUNNING); \ CERROR("obd_fail_timeout id %x awake\n", (id)); \ - } \ -} while(0) + } \ + _ret_; \ +}) + +#define OBD_FAIL_TIMEOUT_MS(id, ms) \ +({ int _ret_ = 0; \ + if (unlikely(obd_fail_loc && (_ret_ = obd_fail_check(id)))) { \ + CERROR("obd_fail_timeout id %x sleeping for %d ms\n", \ + (id), (ms)); \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + cfs_schedule_timeout(CFS_TASK_UNINT, \ + cfs_time_seconds(ms)/1000); \ + set_current_state(TASK_RUNNING); \ + CERROR("obd_fail_timeout id %x awake\n", (id)); \ + } \ + _ret_; \ +}) #ifdef __KERNEL__ /* The idea here is to synchronise two threads to force a race. The @@ -253,7 +316,7 @@ do { \ * the first and continues. */ #define OBD_RACE(id) \ do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ + if (unlikely(obd_fail_loc && obd_fail_check(id))) { \ obd_race_state = 0; \ CERROR("obd_race id %x sleeping\n", (id)); \ OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0); \ diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 2959d78..9e6144e 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -58,6 +58,7 @@ int obd_memmax; /* The following are visible and mutable through /proc/sys/lustre/. */ unsigned int obd_fail_loc; +unsigned int obd_fail_val; unsigned int obd_debug_peer_on_timeout; unsigned int obd_dump_on_timeout; unsigned int obd_dump_on_eviction; @@ -375,6 +376,7 @@ void *obd_psdev = NULL; EXPORT_SYMBOL(obd_devs); EXPORT_SYMBOL(obd_fail_loc); +EXPORT_SYMBOL(obd_fail_val); EXPORT_SYMBOL(obd_print_fail_loc); EXPORT_SYMBOL(obd_race_waitq); EXPORT_SYMBOL(obd_race_state); diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index fcbd13a..52253af 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -52,6 +52,7 @@ struct ctl_table_header *obd_table_header = NULL; enum { OBD_FAIL_LOC = 1, /* control test failures instrumentation */ + OBD_FAIL_VAL, /* userdata for fail loc */ OBD_TIMEOUT, /* RPC timeout before recovery/intr */ OBD_DUMP_ON_TIMEOUT, /* dump kernel debug log upon eviction */ OBD_MEMUSED, /* bytes currently OBD_ALLOCated */ @@ -85,6 +86,8 @@ int LL_PROC_PROTO(proc_set_timeout) static ctl_table obd_table[] = { {OBD_FAIL_LOC, "fail_loc", &obd_fail_loc, sizeof(int), 0644, NULL, &proc_fail_loc}, + {OBD_FAIL_VAL, "fail_val", &obd_fail_val, sizeof(int), 0644, NULL, + &proc_dointvec}, {OBD_TIMEOUT, "timeout", &obd_timeout, sizeof(int), 0644, NULL, &proc_set_timeout}, {OBD_DEBUG_PEER_ON_TIMEOUT, "debug_peer_on_timeout", diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 2cd555c..37befda 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1400,8 +1400,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns, * list (and potentially being added to l_pending_list by an * AST) when we are going to drop this lock ASAP. */ if (lock->l_export->exp_libclient || - OBD_FAIL_CHECK(OBD_FAIL_LDLM_GLIMPSE)) { - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2); + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2)) { ldlm_resource_unlink_lock(lock); err = ELDLM_LOCK_ABORTED; } else { diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index beb8fce..70a1a12 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -193,6 +193,7 @@ unload_modules() { echo "$LEAK_PORTALS" 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` || true echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && echo "ignoring leaks" && return 0 return 254 fi echo "modules unloaded." @@ -806,11 +807,12 @@ pgcache_empty() { ################################## # Test interface error() { + local ERRLOG sysctl -w lustre.fail_loc=0 2> /dev/null || true - log "${TESTSUITE}: **** FAIL:" $@ - $LCTL dk $TMP/lustre-log-$TESTNAME.log - log "FAIL: $TESTNAME $@" - $LCTL dk $TMP/lustrefail_${TESTSUITE}_${TESTNAME}.$(date +%s) + log "${TESTSUITE} ${TESTNAME}: **** FAIL:" $@ + ERRLOG=$TMP/lustre_${TESTSUITE}_${TESTNAME}.$(date +%s) + echo "Dumping lctl log to $ERRLOG" + $LCTL dk $ERRLOG exit 1 } -- 1.8.3.1