From: Alexander Zarochentsev Date: Thu, 9 Jan 2020 17:45:56 +0000 (+0300) Subject: LU-13128 osc: glimpse and lock cancel race X-Git-Tag: 2.13.52~30 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=7c99f67d9d39e8a037e830cf08a9df305e6d8da2;hp=6dc37759cfb22727ac5d776c38b72e8638563fd8 LU-13128 osc: glimpse and lock cancel race osc_dlm_blocking_ast0 clears l_ast_data before writing file data to OST and opens a race window. Neither a glimpse AST nor ldlm_cb_interpret can find correct file attributes at that moment. Cray-bug-id: LUS-8344 Signed-off-by: Alexander Zarochentsev Change-Id: Iadac4f7da94b71639430c9a7cdd77d55e7ba2849 Reviewed-on: https://review.whamcloud.com/37215 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Andriy Skulysh Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 5cf86de..b23a6a7 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -417,6 +417,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_CONNECT_GRANT_PARAM 0x413 #define OBD_FAIL_OSC_DELAY_IO 0x414 #define OBD_FAIL_OSC_NO_SIZE_DATA 0x415 +#define OBD_FAIL_OSC_DELAY_CANCEL 0x416 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index 54ccb02..6ab9bca 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -323,7 +323,6 @@ static int mdc_dlm_blocking_ast0(const struct lu_env *env, if (dlmlock->l_ast_data != NULL) { obj = osc2cl(dlmlock->l_ast_data); - dlmlock->l_ast_data = NULL; cl_object_get(obj); } unlock_res_and_lock(dlmlock); @@ -341,6 +340,7 @@ static int mdc_dlm_blocking_ast0(const struct lu_env *env, */ /* losing a lock, update kms */ lock_res_and_lock(dlmlock); + dlmlock->l_ast_data = NULL; cl_object_attr_lock(obj); attr->cat_kms = 0; cl_object_attr_update(env, obj, attr, CAT_KMS); diff --git a/lustre/osc/osc_lock.c b/lustre/osc/osc_lock.c index 85ab132..d304fe10 100644 --- a/lustre/osc/osc_lock.c +++ b/lustre/osc/osc_lock.c @@ -423,13 +423,13 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env, if (dlmlock->l_ast_data != NULL) { obj = osc2cl(dlmlock->l_ast_data); - dlmlock->l_ast_data = NULL; - cl_object_get(obj); } unlock_res_and_lock(dlmlock); + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_CANCEL, 5); + /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or * the object has been destroyed. */ if (obj != NULL) { @@ -445,6 +445,9 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env, /* losing a lock, update kms */ lock_res_and_lock(dlmlock); + /* clearing l_ast_data after flushing data, + * to let glimpse ast find the lock and the object */ + dlmlock->l_ast_data = NULL; cl_object_attr_lock(obj); /* Must get the value under the lock to avoid race. */ old_kms = cl2osc(obj)->oo_oinfo->loi_kms; diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 8ee25b7..c251532 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -4913,6 +4913,17 @@ test_104() { } run_test 104 "Verify that MDS stores atime/mtime/ctime during close" +test_105() { + test_mkdir -p $DIR/$tdir + echo test > $DIR/$tdir/$tfile + $LCTL set_param fail_loc=0x416 + cancel_lru_locks osc & sleep 1 + fsize1=$(stat -c %s $DIR2/$tdir/$tfile) + wait + [[ $fsize1 = 5 ]] || error "Glimpse returned wrong file size $fsize1" +} +run_test 105 "Glimpse and lock cancel race" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script