Whamcloud - gitweb
LU-14687 llite: Return errors for aio 22/43722/7
authorPatrick Farrell <farr0186@gmail.com>
Wed, 19 May 2021 18:08:57 +0000 (14:08 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 2 Jun 2021 17:48:04 +0000 (17:48 +0000)
The aio code incorrectly discards errors from
ll_direct_rw_pages.  Fix this and add a test for this.

Signed-off-by: Patrick Farrell <farr0186@gmail.com>
Change-Id: I49dadd0b3692820687fa6a1339e00516edf7a5d5
Reviewed-on: https://review.whamcloud.com/43722
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/llite/rw26.c
lustre/obdclass/cl_page.c
lustre/tests/sanity.sh

index d24d3f1..87d8abd 100644 (file)
@@ -596,6 +596,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_SHORT_COMMIT                0x1415
 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2          0x1416
 #define OBD_FAIL_LLITE_RACE_MOUNT                  0x1417
+#define OBD_FAIL_LLITE_PAGE_ALLOC                  0x1418
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index 9a59996..6fdeba9 100644 (file)
@@ -542,7 +542,8 @@ out:
                        vio->u.readwrite.vui_written += tot_bytes;
                else
                        vio->u.readwrite.vui_read += tot_bytes;
-               result = -EIOCBQUEUED;
+               if (result == 0)
+                       result = -EIOCBQUEUED;
        }
 
        return result;
index 1d1c375..e345958 100644 (file)
@@ -215,6 +215,9 @@ static struct cl_page *__cl_page_alloc(struct cl_object *o)
        struct cl_page *cl_page = NULL;
        unsigned short bufsize = cl_object_header(o)->coh_page_bufsize;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PAGE_ALLOC))
+               return NULL;
+
 check:
        /* the number of entries in cl_page_kmem_array is expected to
         * only be 2-3 entries, so the lookup overhead should be low.
index 4e9d58f..157b568 100755 (executable)
@@ -23211,7 +23211,7 @@ test_398b() { # LU-4198
                --filename=$DIR/$tfile || true
        wait $bg_pid
 
-       rm -rf $DIR/$tfile
+       rm -f $DIR/$tfile
 }
 run_test 398b "DIO and buffer IO race"
 
@@ -23271,27 +23271,28 @@ test_398c() { # LU-4198
                --filename=$DIR/$tfile
        [ $? -eq 0 ] || error "fio large block size failed"
 
-       rm -rf $DIR/$tfile
+       rm -f $DIR/$tfile
        $LCTL set_param debug="$saved_debug"
 }
 run_test 398c "run fio to test AIO"
 
 test_398d() { #  LU-13846
-       test -f aiocp || skip_env "no aiocp installed"
-       local aio_file=$DIR/aio_file
+       which aiocp || skip_env "no aiocp installed"
+       local aio_file=$DIR/$tfile.aio
 
        $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file
 
        dd if=/dev/urandom of=$DIR/$tfile bs=1M count=64
        aiocp -a $PAGE_SIZE -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file
+       stack_trap "rm -f $DIR/$tfile $aio_file"
 
-       diff $DIR/$tfile $aio_file || "file diff after aiocp"
+       diff $DIR/$tfile $aio_file || error "file diff after aiocp"
 
        # make sure we don't crash and fail properly
        aiocp -a 512 -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file &&
                error "aio not aligned with PAGE SIZE should fail"
 
-       rm -rf $DIR/$tfile $aio_file
+       rm -f $DIR/$tfile $aio_file
 }
 run_test 398d "run aiocp to verify block size > stripe size"
 
@@ -23302,6 +23303,26 @@ test_398e() {
 }
 run_test 398e "O_Direct open cleared by fcntl doesn't cause hang"
 
+test_398f() { #  LU-14687
+       which aiocp || skip_env "no aiocp installed"
+       local aio_file=$DIR/$tfile.aio
+
+       $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file
+
+       dd if=/dev/zero of=$DIR/$tfile bs=1M count=64
+       stack_trap "rm -f $DIR/$tfile $aio_file"
+
+       #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418
+       $LCTL set_param fail_loc=0x1418
+       # make sure we don't crash and fail properly
+       aiocp -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file &&
+               error "aio with page allocation failure succeeded"
+       $LCTL set_param fail_loc=0
+       diff $DIR/$tfile $aio_file
+       [[ $? != 0 ]] || error "no diff after failed aiocp"
+}
+run_test 398f "verify aio handles ll_direct_rw_pages errors correctly"
+
 test_fake_rw() {
        local read_write=$1
        if [ "$read_write" = "write" ]; then