From: Wang Shilong Date: Sat, 24 Oct 2020 01:47:23 +0000 (+0800) Subject: LU-14072 llite: fix client evicition with DIO X-Git-Tag: 2.13.57~35 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f348437218d0b9f85f1444c219002325338a3277 LU-14072 llite: fix client evicition with DIO We set lockless in file open if O_DIRECT flag is passed, however O_DIRECT flag could be cleared by fcntl(..., F_SETFL, ...). Finally we comes to a case where buffer IO without lock held properly, and hit hang: [] osc_extent_wait+0x21d/0x7c0 [osc] [] osc_cache_wait_range+0x2e7/0x940 [osc] [] osc_cache_writeback_range+0x96e/0xff0 [osc] [] osc_lock_flush+0x195/0x290 [osc] [] osc_lock_lockless_cancel+0x3c/0xe0 [osc] [] cl_lock_cancel+0x78/0x160 [obdclass] [] lov_lock_cancel+0x99/0x190 [lov] [] cl_lock_cancel+0x78/0x160 [obdclass] [] cl_lock_release+0x52/0x140 [obdclass] [] cl_io_unlock+0x139/0x290 [obdclass] [] cl_io_loop+0xb8/0x200 [obdclass] [] ll_file_io_generic+0x91b/0xdf0 [lustre] [] ll_file_aio_write+0x29c/0x6e0 [lustre] [] ll_file_write+0x100/0x1c0 [lustre] [] vfs_write+0xc0/0x1f0 [] SyS_write+0x7f/0xf0 [] system_call_fastpath+0x25/0x2a [] 0xffffffffffffffff Lock cancel time out in the server side and client eviction happen. Fix this problem by testing O_DIRECT flag to decide if we could issue lockless IO. Fixes: 6bce536725 ("LU-4198 clio: turn on lockless for some kind of IO") Change-Id: Idbf1c748684a6540aee5f6e35c017929fbcc60b9 Signed-off-by: Wang Shilong Reviewed-on: https://review.whamcloud.com/40389 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Gu Zheng Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index ad3512c..c0f0840 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1952,7 +1952,7 @@ struct cl_io { /** * Ignore lockless and do normal locking for this io. */ - ci_ignore_lockless:1, + ci_dio_lock:1, /** * Set if we've tried all mirrors for this read IO, if it's not set, * the read IO will check to-be-read OSCs' status, and make fast-switch diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 62d5746..40232fd 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -688,7 +688,6 @@ struct fsxattr { #define LL_FILE_GROUP_LOCKED 0x00000002 #define LL_FILE_READAHEA 0x00000004 #define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */ -#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */ #define LL_FILE_FLOCK_WARNING 0x00000020 /* warned about disabled flock */ #define LOV_USER_MAGIC_V1 0x0BD10BD0 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 675a5a1..a048204 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -930,9 +930,6 @@ restart: mutex_unlock(&lli->lli_och_mutex); - /* lockless for direct IO so that it can do IO in parallel */ - if (file->f_flags & O_DIRECT) - fd->fd_flags |= LL_FILE_LOCKLESS_IO; fd = NULL; /* Must do this outside lli_och_mutex lock to prevent deadlock where @@ -1548,7 +1545,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, struct cl_io *io; ssize_t result = 0; int rc = 0; - unsigned int retried = 0, ignore_lockless = 0; + unsigned int retried = 0, dio_lock = 0; bool is_aio = false; struct cl_dio_aio *ci_aio = NULL; @@ -1571,7 +1568,7 @@ restart: io = vvp_env_thread_io(env); ll_io_init(io, file, iot, args); io->ci_aio = ci_aio; - io->ci_ignore_lockless = ignore_lockless; + io->ci_dio_lock = dio_lock; io->ci_ndelay_tried = retried; if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { @@ -1650,7 +1647,7 @@ out: *ppos, count, result, rc); /* preserve the tried count for FLR */ retried = io->ci_ndelay_tried; - ignore_lockless = io->ci_ignore_lockless; + dio_lock = io->ci_dio_lock; goto restart; } diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 0cee30b..0cc784d 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -1714,9 +1714,9 @@ int ll_readpage(struct file *file, struct page *vmpage) */ if (file->f_flags & O_DIRECT && lcc && lcc->lcc_type == LCC_RW && - !io->ci_ignore_lockless) { + !io->ci_dio_lock) { unlock_page(vmpage); - io->ci_ignore_lockless = 1; + io->ci_dio_lock = 1; io->ci_need_restart = 1; RETURN(-ENOLCK); } diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 65024f2..1415d79 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -679,12 +679,12 @@ static int ll_write_begin(struct file *file, struct address_space *mapping, GOTO(out, result = -EBUSY); /** - * Direct read can fall back to buffered read, but DIO is done + * Direct write can fall back to buffered read, but DIO is done * with lockless i/o, and buffered requires LDLM locking, so * in this case we must restart without lockless. */ - if (!io->ci_ignore_lockless) { - io->ci_ignore_lockless = 1; + if (!io->ci_dio_lock) { + io->ci_dio_lock = 1; io->ci_need_restart = 1; GOTO(out, result = -ENOLCK); } diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index ba6f63a..7099633 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -568,11 +568,11 @@ static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io, if (vio->vui_fd) { /* Group lock held means no lockless any more */ if (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED) - io->ci_ignore_lockless = 1; + io->ci_dio_lock = 1; if (ll_file_nolock(vio->vui_fd->fd_file) || - (vio->vui_fd->fd_flags & LL_FILE_LOCKLESS_IO && - !io->ci_ignore_lockless)) + (vio->vui_fd->fd_file->f_flags & O_DIRECT && + !io->ci_dio_lock)) ast_flags |= CEF_NEVER; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b2f5e38..e01bdc8 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -22370,6 +22370,13 @@ test_398d() { # LU-13846 } run_test 398d "run aiocp to verify block size > stripe size" +test_398e() { + dd if=/dev/zero of=$DIR/$tfile bs=1234 count=1 + touch $DIR/$tfile.new + dd if=$DIR/$tfile of=$DIR/$tfile.new bs=1M count=1 oflag=direct +} +run_test 398e "O_Direct open cleared by fcntl doesn't cause hang" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then