Whamcloud - gitweb
LU-13900 clio: don't call aio_complete() in lustre upon errors 36/39636/7
authorWang Shilong <wshilong@ddn.com>
Tue, 11 Aug 2020 15:17:46 +0000 (23:17 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 Sep 2020 14:12:08 +0000 (14:12 +0000)
See following codes in aio_ret():
"
static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
{
        switch (ret) {
        case -EIOCBQUEUED:
                return ret;
        case -ERESTARTSYS:
        case -ERESTARTNOINTR:
        case -ERESTARTNOHAND:
        case -ERESTART_RESTARTBLOCK:
                /*
                 * There's no easy way to restart the
                 * syscall since other AIO's may be already running.
                 * Just fail this IO with EINTR.
                 */
                ret = -EINTR;
                /*FALLTHRU*/
        default:
                aio_complete(req, ret, 0);
                return 0;
        }
}
"
VFS will call aio_complete() if ret is not -EIOCBQUEUED,
this could happen when we don't pass user buffer as page
alignment or some other errors happen in Lustre.

So in Lustre, we need be careful to handle this case to avoid double
aio_complete() called.

Fixes: d1dde ("LU-4198 clio: AIO support for direct IO")
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Change-Id: I2137844437e91a4e269f7584c3a233e5fc81b0df
Reviewed-on: https://review.whamcloud.com/39636
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Yingjin Qian <qian@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/cl_object.h
lustre/llite/file.c
lustre/obdclass/cl_io.c
lustre/tests/sanity.sh

index bbc9f72..6c9a212 100644 (file)
@@ -2556,6 +2556,7 @@ struct cl_dio_aio {
        struct cl_page_list     cda_pages;
        struct kiocb            *cda_iocb;
        ssize_t                 cda_bytes;
+       unsigned                cda_no_aio_complete:1;
 };
 
 /** @} cl_sync_io */
index d68c232..927fe8b 100644 (file)
@@ -1668,6 +1668,13 @@ out:
        }
 
        if (io->ci_aio) {
+               /*
+                * VFS will call aio_complete() if no -EIOCBQUEUED
+                * is returned for AIO, so we can not call aio_complete()
+                * in our end_io().
+                */
+               if (rc != -EIOCBQUEUED)
+                       io->ci_aio->cda_no_aio_complete = 1;
                /**
                 * Drop one extra reference so that end_io() could be
                 * called for this IO context, we could call it after
index 705aa89..dc4f58f 100644 (file)
@@ -1185,7 +1185,7 @@ static void cl_aio_end(const struct lu_env *env, struct cl_sync_io *anchor)
                cl_page_put(env, page);
        }
 
-       if (!is_sync_kiocb(aio->cda_iocb))
+       if (!is_sync_kiocb(aio->cda_iocb) && !aio->cda_no_aio_complete)
                aio_complete(aio->cda_iocb, ret ?: aio->cda_bytes, 0);
 
        EXIT;
@@ -1205,6 +1205,7 @@ struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb)
                                       NULL : aio, cl_aio_end);
                cl_page_list_init(&aio->cda_pages);
                aio->cda_iocb = iocb;
+               aio->cda_no_aio_complete = 0;
        }
        return aio;
 }
index 207d6b5..cd1b832 100755 (executable)
@@ -22048,6 +22048,11 @@ test_398d() { #  LU-13846
        aiocp -a $PAGE_SIZE -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file
 
        diff $DIR/$tfile $aio_file || "file diff after aiocp"
+
+       # make sure we don't crash and fail properly
+       aiocp -a 512 -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file &&
+               error "aio not aligned with PAGE SIZE should fail"
+
        rm -rf $DIR/$tfile $aio_file
 }
 run_test 398d "run aiocp to verify block size > stripe size"