From fa52db928a5cb7503758018a2d7e887462da8eb7 Mon Sep 17 00:00:00 2001 From: yury Date: Sat, 13 Dec 2008 11:51:15 +0000 Subject: [PATCH] b=17645 r=johann,shadow - handles -EINTR while waiting for lock + race with bl_ast; - adds sanityN.sh test_34 for testing/reproducing this situation. --- lustre/include/lustre_dlm.h | 5 ++++- lustre/include/obd_support.h | 2 ++ lustre/ldlm/ldlm_lock.c | 16 ++++++++++------ lustre/ldlm/ldlm_lockd.c | 14 ++++++++++---- lustre/ldlm/ldlm_request.c | 39 +++++++++++++++++++++++++++++++++------ lustre/tests/sanityN.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 17 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 74a5324..8e9c18a 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -183,6 +183,9 @@ typedef enum { /* measure lock contention and return -EUSERS if locking contention is high */ #define LDLM_FL_DENY_ON_CONTENTION 0x40000000 +/* 0x80000000 is occupied by LDLM_AST_DISCARD_DATA */ +#define LDLM_FL_FAIL_LOC 0x100000000ULL + /* The blocking callback is overloaded to perform two functions. These flags * indicate which operation should be performed. */ #define LDLM_CB_BLOCKING 1 @@ -517,7 +520,7 @@ struct ldlm_lock { ldlm_policy_data_t l_policy_data; /* protected by lr_lock */ - __u32 l_flags; + __u64 l_flags; __u32 l_readers; __u32 l_writers; __u8 l_destroyed; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index bc4aeb3..e3530cc 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -239,6 +239,8 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 #define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 #define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 +#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 +#define OBD_FAIL_LDLM_CP_BL_RACE 0x318 #define OBD_FAIL_OSC 0x400 #define OBD_FAIL_OSC_BRW_READ_BULK 0x401 diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index e5c49c1..48bb4d2 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -638,6 +638,10 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) LDLM_LOCK_GET(lock); /* dropped by bl thread */ ldlm_lock_remove_from_lru(lock); unlock_res_and_lock(lock); + + if (lock->l_flags & LDLM_FL_FAIL_LOC) + OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); + if ((lock->l_flags & LDLM_FL_ATOMIC_CB) || ldlm_bl_to_thread_lock(ns, NULL, lock) != 0) ldlm_handle_bl_callback(ns, NULL, lock); @@ -1744,7 +1748,7 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos) lock->l_resource->lr_name.name[0], lock->l_resource->lr_name.name[1]); CDEBUG(level, " Req mode: %s, grant mode: %s, rc: %u, read: %d, " - "write: %d flags: %#x\n", ldlm_lockname[lock->l_req_mode], + "write: %d flags: "LPX64"\n", ldlm_lockname[lock->l_req_mode], ldlm_lockname[lock->l_granted_mode], atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers, lock->l_flags); @@ -1792,7 +1796,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " + "res: \?\? rrc=\?\? type: \?\?\? flags: "LPX64" remote: " LPX64" expref: %d pid: %u timeout: %lu\n", lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers, @@ -1812,7 +1816,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, data->msg_fn, data->msg_line, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64 - "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 + "] (req "LPU64"->"LPU64") flags: "LPX64" remote: "LPX64 " expref: %d pid: %u timeout %lu\n", lock->l_resource->lr_namespace->ns_name, lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc), @@ -1836,7 +1840,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, data->msg_fn, data->msg_line, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " - "["LPU64"->"LPU64"] flags: %x remote: "LPX64 + "["LPU64"->"LPU64"] flags: "LPX64" remote: "LPX64 " expref: %d pid: %u timeout: %lu\n", lock->l_resource->lr_namespace->ns_name, lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc), @@ -1860,7 +1864,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, data->msg_fn, data->msg_line, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " - "flags: %x remote: "LPX64" expref: %d " + "flags: "LPX64" remote: "LPX64" expref: %d " "pid: %u timeout: %lu\n", lock->l_resource->lr_namespace->ns_name, lock, lock->l_handle.h_cookie, @@ -1882,7 +1886,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level, libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file, data->msg_fn, data->msg_line, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " + "res: "LPU64"/"LPU64" rrc: %d type: %s flags: "LPX64" " "remote: "LPX64" expref: %d pid: %u timeout %lu\n", lock->l_resource->lr_namespace->ns_name, lock, lock->l_handle.h_cookie, diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 1386afe..dc0dea9 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1183,7 +1183,7 @@ existing_lock: if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) || !(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK)) { CERROR("Granting sync lock to libclient. " - "req fl %d, rep fl %d, lock fl %d\n", + "req fl %d, rep fl %d, lock fl "LPX64"\n", dlm_req->lock_flags, dlm_rep->lock_flags, lock->l_flags); LDLM_ERROR(lock, "sync lock"); @@ -1751,15 +1751,21 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) RETURN(0); } + if ((lock->l_flags & LDLM_FL_FAIL_LOC) && + lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) + OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); + /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ lock_res_and_lock(lock); lock->l_flags |= (dlm_req->lock_flags & LDLM_AST_FLAGS); if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { - /* If somebody cancels locks and cache is already droped, + /* If somebody cancels lock and cache is already droped, + * or lock is failed before cp_ast received on client, * we can tell the server we have no lock. Otherwise, we * should send cancel after dropping the cache. */ - if ((lock->l_flags & LDLM_FL_CANCELING) && - (lock->l_flags & LDLM_FL_BL_DONE)) { + if (((lock->l_flags & LDLM_FL_CANCELING) && + (lock->l_flags & LDLM_FL_BL_DONE)) || + (lock->l_flags & LDLM_FL_FAILED)) { LDLM_DEBUG(lock, "callback on lock " LPX64" - lock disappeared\n", dlm_req->lock_handle[0].cookie); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 672063a..e318e88 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -188,8 +188,17 @@ noreproc: spin_unlock(&imp->imp_lock); } - /* Go to sleep until the lock is granted or cancelled. */ - rc = l_wait_event(lock->l_waitq, is_granted_or_cancelled(lock), &lwi); + if (ns_is_client(lock->l_resource->lr_namespace) && + lock->l_resource->lr_type == LDLM_EXTENT && + OBD_FAIL_CHECK(OBD_FAIL_LDLM_INTR_CP_AST | OBD_FAIL_ONCE)) { + obd_fail_loc = OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE; + lock->l_flags |= LDLM_FL_FAIL_LOC; + rc = -EINTR; + } else { + /* Go to sleep until the lock is granted or cancelled. */ + rc = l_wait_event(lock->l_waitq, + is_granted_or_cancelled(lock), &lwi); + } if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) { LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); @@ -350,13 +359,31 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, struct ldlm_lock *lock, struct lustre_handle *lockh, int mode) { + int need_cancel = 0; + /* Set a flag to prevent us from sending a CANCEL (bug 407) */ lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_LOCAL_ONLY; + /* Check that lock is not granted or failed, we might race. */ + if ((lock->l_req_mode != lock->l_granted_mode) && + !(lock->l_flags & LDLM_FL_FAILED)) { + /* Make sure that this lock will not be found by raced + * bl_ast and -EINVAL reply is sent to server anyways. + * bug 17645 */ + lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED | + LDLM_FL_ATOMIC_CB; + need_cancel = 1; + } unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); - - ldlm_lock_decref_and_cancel(lockh, mode); + + if (need_cancel) { + LDLM_DEBUG(lock, + "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | " + "LDLM_FL_ATOMIC_CB"); + ldlm_lock_decref_and_cancel(lockh, mode); + } else { + LDLM_DEBUG(lock, "lock was granted or failed in race"); + ldlm_lock_decref(lockh, mode); + } /* XXX - HACK because we shouldn't call ldlm_lock_destroy() * from llite/file.c/ll_file_flock(). */ diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 2181fe1..f8844e6 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -736,6 +736,48 @@ test_33() { #16129 } run_test 33 "no lock timeout under IO" +test_34() { # bug 17645 + local generation=[] + local count=0 + for imp in /proc/fs/lustre/osc/$FSNAME-OST*-osc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + generation[count]=$g + let count=count+1 + done + + dd if=/dev/zero of=$MOUNT1/$tfile bs=1M count=10 + sync + cancel_lru_locks osc + + # Let's get some read locks so that later we have something to + # conflict with + dd if=$MOUNT1/$tfile of=$MOUNT1/${tfile}-1 bs=1k count=10000 + + # Let's initiate -EINTR situation by setting fail_loc and take + # write lock on same file from same client. This will not cause + # bl_ast yet as lock is already in local cache. +#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 + do_facet client "lctl set_param fail_loc=0x80000317" + dd if=$MOUNT1/${tfile}-1 of=$MOUNT1/$tfile bs=1k count=10000 & + sleep 1 + + # Let's take write lock on same file from another mount. This + # should cause conflict and bl_ast + dd if=$MOUNT2/${tfile}-1 of=$MOUNT2/$tfile bs=1k count=10000 & + wait + do_facet client "lctl set_param fail_loc=0x0" + df -h $MOUNT1 $MOUNT2 + count=0 + for imp in /proc/fs/lustre/osc/$FSNAME-OST*-osc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + if ! test "$g" -eq "${generation[count]}"; then + error "Eviction happened on import $(basename $imp)" + fi + let count=count+1 + done +} +run_test 34 "-EINTR cp_ast vs. bl_ast race does not evict client" + log "cleanup: ======================================================" check_and_cleanup_lustre -- 1.8.3.1