Whamcloud - gitweb
LU-13128 osc: glimpse and lock cancel race 15/37215/5
authorAlexander Zarochentsev <c17826@cray.com>
Thu, 9 Jan 2020 17:45:56 +0000 (20:45 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 8 Feb 2020 03:59:41 +0000 (03:59 +0000)
osc_dlm_blocking_ast0 clears l_ast_data before writing
file data to OST and opens a race window. Neither a glimpse
AST nor ldlm_cb_interpret can find correct file attributes at
that moment.

Cray-bug-id: LUS-8344
Signed-off-by: Alexander Zarochentsev <c17826@cray.com>
Change-Id: Iadac4f7da94b71639430c9a7cdd77d55e7ba2849
Reviewed-on: https://review.whamcloud.com/37215
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/mdc/mdc_dev.c
lustre/osc/osc_lock.c
lustre/tests/sanityn.sh

index 5cf86de..b23a6a7 100644 (file)
@@ -417,6 +417,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSC_CONNECT_GRANT_PARAM 0x413
 #define OBD_FAIL_OSC_DELAY_IO            0x414
 #define OBD_FAIL_OSC_NO_SIZE_DATA        0x415
 #define OBD_FAIL_OSC_CONNECT_GRANT_PARAM 0x413
 #define OBD_FAIL_OSC_DELAY_IO            0x414
 #define OBD_FAIL_OSC_NO_SIZE_DATA        0x415
+#define OBD_FAIL_OSC_DELAY_CANCEL        0x416
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
index 54ccb02..6ab9bca 100644 (file)
@@ -323,7 +323,6 @@ static int mdc_dlm_blocking_ast0(const struct lu_env *env,
 
        if (dlmlock->l_ast_data != NULL) {
                obj = osc2cl(dlmlock->l_ast_data);
 
        if (dlmlock->l_ast_data != NULL) {
                obj = osc2cl(dlmlock->l_ast_data);
-               dlmlock->l_ast_data = NULL;
                cl_object_get(obj);
        }
        unlock_res_and_lock(dlmlock);
                cl_object_get(obj);
        }
        unlock_res_and_lock(dlmlock);
@@ -341,6 +340,7 @@ static int mdc_dlm_blocking_ast0(const struct lu_env *env,
                 */
                /* losing a lock, update kms */
                lock_res_and_lock(dlmlock);
                 */
                /* losing a lock, update kms */
                lock_res_and_lock(dlmlock);
+               dlmlock->l_ast_data = NULL;
                cl_object_attr_lock(obj);
                attr->cat_kms = 0;
                cl_object_attr_update(env, obj, attr, CAT_KMS);
                cl_object_attr_lock(obj);
                attr->cat_kms = 0;
                cl_object_attr_update(env, obj, attr, CAT_KMS);
index 85ab132..d304fe1 100644 (file)
@@ -423,13 +423,13 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
 
        if (dlmlock->l_ast_data != NULL) {
                obj = osc2cl(dlmlock->l_ast_data);
 
        if (dlmlock->l_ast_data != NULL) {
                obj = osc2cl(dlmlock->l_ast_data);
-               dlmlock->l_ast_data = NULL;
-
                cl_object_get(obj);
        }
 
        unlock_res_and_lock(dlmlock);
 
                cl_object_get(obj);
        }
 
        unlock_res_and_lock(dlmlock);
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_CANCEL, 5);
+
        /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
         * the object has been destroyed. */
        if (obj != NULL) {
        /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or
         * the object has been destroyed. */
        if (obj != NULL) {
@@ -445,6 +445,9 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
 
                /* losing a lock, update kms */
                lock_res_and_lock(dlmlock);
 
                /* losing a lock, update kms */
                lock_res_and_lock(dlmlock);
+               /* clearing l_ast_data after flushing data,
+                * to let glimpse ast find the lock and the object */
+               dlmlock->l_ast_data = NULL;
                cl_object_attr_lock(obj);
                /* Must get the value under the lock to avoid race. */
                old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
                cl_object_attr_lock(obj);
                /* Must get the value under the lock to avoid race. */
                old_kms = cl2osc(obj)->oo_oinfo->loi_kms;
index 8ee25b7..c251532 100755 (executable)
@@ -4913,6 +4913,17 @@ test_104() {
 }
 run_test 104 "Verify that MDS stores atime/mtime/ctime during close"
 
 }
 run_test 104 "Verify that MDS stores atime/mtime/ctime during close"
 
+test_105() {
+       test_mkdir -p $DIR/$tdir
+       echo test > $DIR/$tdir/$tfile
+       $LCTL set_param fail_loc=0x416
+       cancel_lru_locks osc & sleep 1
+       fsize1=$(stat -c %s $DIR2/$tdir/$tfile)
+       wait
+       [[ $fsize1 = 5 ]] ||  error "Glimpse returned wrong file size $fsize1"
+}
+run_test 105 "Glimpse and lock cancel race"
+
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script
 log "cleanup: ======================================================"
 
 # kill and wait in each test only guarentee script finish, but command in script