From 13079de3417ce41ce485b2bdad36834e2f6e6e15 Mon Sep 17 00:00:00 2001 From: "Alexander.Boyko" Date: Tue, 3 Dec 2013 10:00:22 +0400 Subject: [PATCH] LU-3889 osc: Allow lock to be canceled at ENQ time A cl_lock can be canceled when it's in CLS_ENQUEUED state. We can't unuse this kind of lock in lov_lock_unuse() because it will bring this lock into CLS_NEW state and then confuse osc_lock_upcall(). Add a regression test case by Alexander Boyko. Signed-off-by: Jinshan Xiong Signed-off-by: Alexander Boyko Change-Id: I2acc7fd0176280062eb0d25dbe929b5d0144db50 Reviewed-on: http://review.whamcloud.com/8405 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/lov/lov_lock.c | 32 ++++++++++++++++++++------------ lustre/osc/osc_request.c | 3 +++ lustre/tests/sanityn.sh | 22 ++++++++++++++++++++++ 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index b4827e0..254a7de 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -343,6 +343,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LDLM_AGL_DELAY 0x31a #define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b #define OBD_FAIL_LDLM_OST_LVB 0x31c +#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 diff --git a/lustre/lov/lov_lock.c b/lustre/lov/lov_lock.c index 547bdca..39a4aaf 100644 --- a/lustre/lov/lov_lock.c +++ b/lustre/lov/lov_lock.c @@ -709,18 +709,26 @@ static int lov_lock_unuse(const struct lu_env *env, if (sub == NULL) continue; - sublock = sub->lss_cl.cls_lock; - rc = lov_sublock_lock(env, lck, lls, closure, &subenv); - if (rc == 0) { - if (lls->sub_flags & LSF_HELD) { - LASSERT(sublock->cll_state == CLS_HELD || - sublock->cll_state == CLS_ENQUEUED); - rc = cl_unuse_try(subenv->lse_env, sublock); - rc = lov_sublock_release(env, lck, i, 0, rc); - } - lov_sublock_unlock(env, sub, closure, subenv); - } - result = lov_subresult(result, rc); + sublock = sub->lss_cl.cls_lock; + rc = lov_sublock_lock(env, lck, lls, closure, &subenv); + if (rc == 0) { + if (!(lls->sub_flags & LSF_HELD)) { + lov_sublock_unlock(env, sub, closure, subenv); + continue; + } + + switch(sublock->cll_state) { + case CLS_HELD: + rc = cl_unuse_try(subenv->lse_env, sublock); + lov_sublock_release(env, lck, i, 0, 0); + break; + default: + lov_sublock_release(env, lck, i, 1, 0); + break; + } + lov_sublock_unlock(env, sub, closure, subenv); + } + result = lov_subresult(result, rc); } if (result == 0 && lck->lls_cancel_race) { diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 09afa3c..0c14415 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2438,6 +2438,9 @@ static int osc_enqueue_interpret(const struct lu_env *env, * osc_enqueue_fini(). */ ldlm_lock_addref(&handle, mode); + /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */ + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2); + /* Let CP AST to grant the lock first. */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1); diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 16cfedb..9340b37 100644 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -2549,6 +2549,28 @@ test_74() { } run_test 74 "flock deadlock: different mounts ==============" +# LU-3889 +test_75() { + $LFS setstripe -c 2 -S 1m -i 0 $DIR1/$tfile + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=2 + cancel_lru_locks osc + + dd of=$DIR1/$tfile if=/dev/zero bs=1M count=1 seek=1 conv=notrunc + sync + + # define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d + $LCTL set_param fail_loc=0x31d + stat -c %s $DIR1/$tfile & + local pid=$! + sleep 1 + kill -9 $pid + + # For bad lock error handler we should ASSERT and got kernel panic here + sleep 4 + $LCTL set_param fail_loc=0 +} +run_test 75 "osc: upcall after unuse lock===================" + log "cleanup: ======================================================" [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2 -- 1.8.3.1