From d2f68db4f42ae2db0292b177922d5c4c926a5396 Mon Sep 17 00:00:00 2001 From: "Alexander.Boyko" Date: Tue, 3 Dec 2013 10:00:22 +0400 Subject: [PATCH] LU-3889 osc: Allow lock to be canceled at ENQ time A cl_lock can be canceled when it's in CLS_ENQUEUED state. We can't unuse this kind of lock in lov_lock_unuse() because it will bring this lock into CLS_NEW state and then confuse osc_lock_upcall(). Add a regression test case by Alexander Boyko. Signed-off-by: Jinshan Xiong Signed-off-by: Bob Glossman Signed-off-by: Alexander Boyko Change-Id: Ieb9209203a363c9098b9ea4637a9d12e12ef1aa4 Reviewed-on: http://review.whamcloud.com/8717 Tested-by: Jenkins Reviewed-by: Bobi Jam Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/lov/lov_lock.c | 32 ++++++++++++++++++++------------ lustre/osc/osc_request.c | 3 +++ lustre/tests/sanityn.sh | 22 ++++++++++++++++++++++ 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 45d8e95..e7e4205 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -341,6 +341,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LDLM_AGL_DELAY 0x31a #define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b #define OBD_FAIL_LDLM_OST_LVB 0x31c +#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 diff --git a/lustre/lov/lov_lock.c b/lustre/lov/lov_lock.c index 547bdca..39a4aaf 100644 --- a/lustre/lov/lov_lock.c +++ b/lustre/lov/lov_lock.c @@ -709,18 +709,26 @@ static int lov_lock_unuse(const struct lu_env *env, if (sub == NULL) continue; - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - if (lls->sub_flags & LSF_HELD) { - LASSERT(sublock->cll_state == CLS_HELD || - sublock->cll_state == CLS_ENQUEUED); - rc = cl_unuse_try(subenv->lse_env, sublock); - rc = lov_sublock_release(env, lck, i, 0, rc); - } - lov_sublock_unlock(env, sub, closure, subenv); - } - result = lov_subresult(result, rc); + sublock = sub->lss_cl.cls_lock; + rc = lov_sublock_lock(env, lck, lls, closure, &subenv); + if (rc == 0) { + if (!(lls->sub_flags & LSF_HELD)) { + lov_sublock_unlock(env, sub, closure, subenv); + continue; + } + + switch(sublock->cll_state) { + case CLS_HELD: + rc = cl_unuse_try(subenv->lse_env, sublock); + lov_sublock_release(env, lck, i, 0, 0); + break; + default: + lov_sublock_release(env, lck, i, 1, 0); + break; + } + lov_sublock_unlock(env, sub, closure, subenv); + } + result = lov_subresult(result, rc); } if (result == 0 && lck->lls_cancel_race) { diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 582ba19..d9e7839 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2399,6 +2399,9 @@ static int osc_enqueue_interpret(const struct lu_env *env, * osc_enqueue_fini(). */ ldlm_lock_addref(&handle, mode); + /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */ + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2); + /* Let CP AST to grant the lock first. */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1); diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 1ccc239..57bfe15 100644 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -2546,6 +2546,28 @@ test_74() { } run_test 74 "flock deadlock: different mounts ==============" +# LU-3889 +test_75() { + $LFS setstripe -c 2 -S 1m -i 0 $DIR1/$tfile + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=2 + cancel_lru_locks osc + + dd of=$DIR1/$tfile if=/dev/zero bs=1M count=1 seek=1 conv=notrunc + sync + + # define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d + $LCTL set_param fail_loc=0x31d + stat -c %s $DIR1/$tfile & + local pid=$! + sleep 1 + kill -9 $pid + + # For bad lock error handler we should ASSERT and got kernel panic here + sleep 4 + $LCTL set_param fail_loc=0 +} +run_test 75 "osc: upcall after unuse lock===================" + log "cleanup: ======================================================" [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2 -- 1.8.3.1