Whamcloud - gitweb
LU-14687 llite: Return errors for aio
authorPatrick Farrell <pfarrell@whamcloud.com>
Thu, 15 Jul 2021 21:47:54 +0000 (17:47 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 6 Aug 2021 02:51:22 +0000 (02:51 +0000)
The aio code incorrectly discards errors from
ll_direct_rw_pages.  Fix this and add a test for this.

Lustre-change: https://review.whamcloud.com/43722
Lustre-commit: 3e1f8d30cb0209b35410e85e502e2cae40f1b58c

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I49dadd0b3692820687fa6a1339e00516edf7a5d5
Reviewed-on: https://review.whamcloud.com/43722
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/44323
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
lustre/include/obd_support.h
lustre/llite/rw26.c
lustre/obdclass/cl_page.c
lustre/tests/sanity.sh

index e911616..b52ef0c 100644 (file)
@@ -592,6 +592,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_PCC_ATTACH_PAUSE                    0x1414
 #define OBD_FAIL_LLITE_SHORT_COMMIT                0x1415
 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2          0x1416
+#define OBD_FAIL_LLITE_PAGE_ALLOC                  0x1418
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index b332d08..9c4ea81 100644 (file)
@@ -543,7 +543,8 @@ out:
                        vio->u.readwrite.vui_written += tot_bytes;
                else
                        vio->u.readwrite.vui_read += tot_bytes;
-               result = -EIOCBQUEUED;
+               if (result == 0)
+                       result = -EIOCBQUEUED;
        }
 
        return result;
index 2a8df48..08daf51 100644 (file)
@@ -216,6 +216,9 @@ static struct cl_page *__cl_page_alloc(struct cl_object *o)
        struct cl_page *cl_page = NULL;
        unsigned short bufsize = cl_object_header(o)->coh_page_bufsize;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PAGE_ALLOC))
+               return NULL;
+
 check:
        /* the number of entries in cl_page_kmem_array is expected to
         * only be 2-3 entries, so the lookup overhead should be low.
index 9c5f558..7d3f95d 100755 (executable)
@@ -22787,7 +22787,7 @@ test_398b() { # LU-4198
                --filename=$DIR/$tfile || true
        wait $bg_pid
 
-       rm -rf $DIR/$tfile
+       rm -f $DIR/$tfile
 }
 run_test 398b "DIO and buffer IO race"
 
@@ -22847,27 +22847,28 @@ test_398c() { # LU-4198
                --filename=$DIR/$tfile
        [ $? -eq 0 ] || error "fio large block size failed"
 
-       rm -rf $DIR/$tfile
+       rm -f $DIR/$tfile
        $LCTL set_param debug="$saved_debug"
 }
 run_test 398c "run fio to test AIO"
 
 test_398d() { #  LU-13846
-       test -f aiocp || skip_env "no aiocp installed"
-       local aio_file=$DIR/aio_file
+       which aiocp || skip_env "no aiocp installed"
+       local aio_file=$DIR/$tfile.aio
 
        $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file
 
        dd if=/dev/urandom of=$DIR/$tfile bs=1M count=64
        aiocp -a $PAGE_SIZE -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file
+       stack_trap "rm -f $DIR/$tfile $aio_file"
 
-       diff $DIR/$tfile $aio_file || "file diff after aiocp"
+       diff $DIR/$tfile $aio_file || error "file diff after aiocp"
 
        # make sure we don't crash and fail properly
        aiocp -a 512 -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file &&
                error "aio not aligned with PAGE SIZE should fail"
 
-       rm -rf $DIR/$tfile $aio_file
+       rm -f $DIR/$tfile $aio_file
 }
 run_test 398d "run aiocp to verify block size > stripe size"
 
@@ -22878,6 +22879,26 @@ test_398e() {
 }
 run_test 398e "O_Direct open cleared by fcntl doesn't cause hang"
 
+test_398f() { #  LU-14687
+       which aiocp || skip_env "no aiocp installed"
+       local aio_file=$DIR/$tfile.aio
+
+       $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file
+
+       dd if=/dev/zero of=$DIR/$tfile bs=1M count=64
+       stack_trap "rm -f $DIR/$tfile $aio_file"
+
+       #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418
+       $LCTL set_param fail_loc=0x1418
+       # make sure we don't crash and fail properly
+       aiocp -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file &&
+               error "aio with page allocation failure succeeded"
+       $LCTL set_param fail_loc=0
+       diff $DIR/$tfile $aio_file
+       [[ $? != 0 ]] || error "no diff after failed aiocp"
+}
+run_test 398f "verify aio handles ll_direct_rw_pages errors correctly"
+
 test_fake_rw() {
        local read_write=$1
        if [ "$read_write" = "write" ]; then