Whamcloud - gitweb
LU-10212 test: ESTALE read 01/31101/7
authorAlexander Boyko <c17825@cray.com>
Wed, 31 Jan 2018 11:17:42 +0000 (06:17 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 6 Mar 2018 19:13:17 +0000 (19:13 +0000)
The patch reproduces the issue, when a read rpc come
to OST with a lock handle which has the LDLM_FL_DESTROY
flag. And then a client gets the ESTALE error for a read
operation.

Test-Parameters: trivial testlist=sanity
Signed-off-by: Alexander Boyko <c17825@cray.com>
Cray-bug-id: MRP-4604
Change-Id: I0722fc57a61153b25a05bf7aebce5d7f32bbc95b
Reviewed-on: https://review.whamcloud.com/31101
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Elena Gryaznova <c17455@cray.com>
lustre/include/obd_support.h
lustre/ldlm/ldlm_extent.c
lustre/tests/sanity.sh

index b309437..1e3fd9b 100644 (file)
@@ -374,6 +374,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LDLM_WATERMARK_HIGH    0x328
 
 #define OBD_FAIL_LDLM_GRANT_CHECK        0x32a
 #define OBD_FAIL_LDLM_WATERMARK_HIGH    0x328
 
 #define OBD_FAIL_LDLM_GRANT_CHECK        0x32a
+#define OBD_FAIL_LDLM_PROLONG_PAUSE     0x32b
 
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
 
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
index 35a4c46..623563c 100644 (file)
@@ -678,6 +678,8 @@ void ldlm_lock_prolong_one(struct ldlm_lock *lock,
 {
        time64_t timeout;
 
 {
        time64_t timeout;
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PROLONG_PAUSE, 3);
+
        if (arg->lpa_export != lock->l_export ||
            lock->l_flags & LDLM_FL_DESTROYED)
                /* ignore unrelated locks */
        if (arg->lpa_export != lock->l_export ||
            lock->l_flags & LDLM_FL_DESTROYED)
                /* ignore unrelated locks */
index 4fc5ac2..715a35f 100755 (executable)
@@ -16227,6 +16227,39 @@ test_271c() {
 }
 run_test 271c "DoM: IO lock at open saves enqueue RPCs"
 
 }
 run_test 271c "DoM: IO lock at open saves enqueue RPCs"
 
+test_275() {
+       remote_ost_nodsh && skip "remote OST with nodsh" && return
+       [ $(lustre_version_code ost1) -lt $(version_code 2.10.57) ] &&
+               skip "Need OST version >= 2.10.57" && return 0
+
+       local file=$DIR/$tfile
+       local oss
+
+       oss=$(comma_list $(osts_nodes))
+
+       dd if=/dev/urandom of=$file bs=1M count=2 ||
+               error "failed to create a file"
+       cancel_lru_locks osc
+
+       #lock 1
+       dd if=$file of=/dev/null bs=1M count=1 iflag=direct ||
+               error "failed to read a file"
+
+#define OBD_FAIL_LDLM_PAUSE_CANCEL2      0x31f
+       $LCTL set_param fail_loc=0x8000031f
+
+       cancel_lru_locks osc &
+       sleep 1
+
+#define OBD_FAIL_LDLM_PROLONG_PAUSE      0x32b
+       do_nodes $oss $LCTL set_param fail_loc=0x8000032b
+       #IO takes another lock, but matches the PENDING one
+       #and places it to the IO RPC
+       dd if=$file of=/dev/null bs=1M count=1 iflag=direct ||
+               error "failed to read a file with PENDING lock"
+}
+run_test 275 "Read on a canceled duplicate lock"
+
 test_276() {
        remote_ost_nodsh && skip "remote OST with nodsh" && return
        local pid
 test_276() {
        remote_ost_nodsh && skip "remote OST with nodsh" && return
        local pid