From 2fb8444b5a636956281f180a04845502e7e99691 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 11 Aug 2020 23:17:46 +0800 Subject: [PATCH] LU-13900 clio: don't call aio_complete() in lustre upon errors See following codes in aio_ret(): " static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret) { switch (ret) { case -EIOCBQUEUED: return ret; case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: case -ERESTART_RESTARTBLOCK: /* * There's no easy way to restart the * syscall since other AIO's may be already running. * Just fail this IO with EINTR. */ ret = -EINTR; /*FALLTHRU*/ default: aio_complete(req, ret, 0); return 0; } } " VFS will call aio_complete() if ret is not -EIOCBQUEUED, this could happen when we don't pass user buffer as page alignment or some other errors happen in Lustre. So in Lustre, we need be careful to handle this case to avoid double aio_complete() called. Fixes: d1dde ("LU-4198 clio: AIO support for direct IO") Signed-off-by: Wang Shilong Change-Id: I2137844437e91a4e269f7584c3a233e5fc81b0df Reviewed-on: https://review.whamcloud.com/39636 Reviewed-by: Bobi Jam Tested-by: jenkins Reviewed-by: Yingjin Qian Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 1 + lustre/llite/file.c | 7 +++++++ lustre/obdclass/cl_io.c | 3 ++- lustre/tests/sanity.sh | 5 +++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index bbc9f72..6c9a212 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -2556,6 +2556,7 @@ struct cl_dio_aio { struct cl_page_list cda_pages; struct kiocb *cda_iocb; ssize_t cda_bytes; + unsigned cda_no_aio_complete:1; }; /** @} cl_sync_io */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index d68c232..927fe8b 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1668,6 +1668,13 @@ out: } if (io->ci_aio) { + /* + * VFS will call aio_complete() if no -EIOCBQUEUED + * is returned for AIO, so we can not call aio_complete() + * in our end_io(). + */ + if (rc != -EIOCBQUEUED) + io->ci_aio->cda_no_aio_complete = 1; /** * Drop one extra reference so that end_io() could be * called for this IO context, we could call it after diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c index 705aa89..dc4f58f 100644 --- a/lustre/obdclass/cl_io.c +++ b/lustre/obdclass/cl_io.c @@ -1185,7 +1185,7 @@ static void cl_aio_end(const struct lu_env *env, struct cl_sync_io *anchor) cl_page_put(env, page); } - if (!is_sync_kiocb(aio->cda_iocb)) + if (!is_sync_kiocb(aio->cda_iocb) && !aio->cda_no_aio_complete) aio_complete(aio->cda_iocb, ret ?: aio->cda_bytes, 0); EXIT; @@ -1205,6 +1205,7 @@ struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb) NULL : aio, cl_aio_end); cl_page_list_init(&aio->cda_pages); aio->cda_iocb = iocb; + aio->cda_no_aio_complete = 0; } return aio; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 207d6b5..cd1b832 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -22048,6 +22048,11 @@ test_398d() { # LU-13846 aiocp -a $PAGE_SIZE -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file diff $DIR/$tfile $aio_file || "file diff after aiocp" + + # make sure we don't crash and fail properly + aiocp -a 512 -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file && + error "aio not aligned with PAGE SIZE should fail" + rm -rf $DIR/$tfile $aio_file } run_test 398d "run aiocp to verify block size > stripe size" -- 1.8.3.1