Whamcloud - gitweb
LU-14072 llite: fix client evicition with DIO 89/40389/3
authorWang Shilong <wshilong@ddn.com>
Sat, 24 Oct 2020 01:47:23 +0000 (09:47 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 26 Nov 2020 23:57:02 +0000 (23:57 +0000)
We set lockless in file open if O_DIRECT flag is passed,
however O_DIRECT flag could be cleared by
fcntl(..., F_SETFL, ...).

Finally we comes to a case where buffer IO without lock
held properly, and hit hang:

[<ffffffffc0d421ed>] osc_extent_wait+0x21d/0x7c0 [osc]
[<ffffffffc0d44897>] osc_cache_wait_range+0x2e7/0x940 [osc]
[<ffffffffc0d4585e>] osc_cache_writeback_range+0x96e/0xff0 [osc]
[<ffffffffc0d31c45>] osc_lock_flush+0x195/0x290 [osc]
[<ffffffffc0d31d7c>] osc_lock_lockless_cancel+0x3c/0xe0 [osc]
[<ffffffffc081f488>] cl_lock_cancel+0x78/0x160 [obdclass]
[<ffffffffc0cd8079>] lov_lock_cancel+0x99/0x190 [lov]
[<ffffffffc081f488>] cl_lock_cancel+0x78/0x160 [obdclass]
[<ffffffffc081f9a2>] cl_lock_release+0x52/0x140 [obdclass]
[<ffffffffc08238a9>] cl_io_unlock+0x139/0x290 [obdclass]
[<ffffffffc08242e8>] cl_io_loop+0xb8/0x200 [obdclass]
[<ffffffffc0e1d36b>] ll_file_io_generic+0x91b/0xdf0 [lustre]
[<ffffffffc0e1dd0c>] ll_file_aio_write+0x29c/0x6e0 [lustre]
[<ffffffffc0e1e250>] ll_file_write+0x100/0x1c0 [lustre]
[<ffffffffa984aa90>] vfs_write+0xc0/0x1f0
[<ffffffffa984b8af>] SyS_write+0x7f/0xf0
[<ffffffffa9d8eede>] system_call_fastpath+0x25/0x2a
[<ffffffffffffffff>] 0xffffffffffffffff

Lock cancel time out in the server side and client
eviction happen.

Fix this problem by testing O_DIRECT flag to decide if
we could issue lockless IO.

Fixes: 6bce536725 ("LU-4198 clio: turn on lockless for some kind of IO")
Change-Id: Idbf1c748684a6540aee5f6e35c017929fbcc60b9
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/40389
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/cl_object.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/file.c
lustre/llite/rw.c
lustre/llite/rw26.c
lustre/llite/vvp_io.c
lustre/tests/sanity.sh

index ad3512c..c0f0840 100644 (file)
@@ -1952,7 +1952,7 @@ struct cl_io {
        /**
         * Ignore lockless and do normal locking for this io.
         */
-                            ci_ignore_lockless:1,
+                            ci_dio_lock:1,
        /**
         * Set if we've tried all mirrors for this read IO, if it's not set,
         * the read IO will check to-be-read OSCs' status, and make fast-switch
index 62d5746..40232fd 100644 (file)
@@ -688,7 +688,6 @@ struct fsxattr {
 #define LL_FILE_GROUP_LOCKED    0x00000002
 #define LL_FILE_READAHEA        0x00000004
 #define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
-#define LL_FILE_LOCKLESS_IO     0x00000010 /* server-side locks with cio */
 #define LL_FILE_FLOCK_WARNING   0x00000020 /* warned about disabled flock */
 
 #define LOV_USER_MAGIC_V1      0x0BD10BD0
index 675a5a1..a048204 100644 (file)
@@ -930,9 +930,6 @@ restart:
 
        mutex_unlock(&lli->lli_och_mutex);
 
-       /* lockless for direct IO so that it can do IO in parallel */
-       if (file->f_flags & O_DIRECT)
-               fd->fd_flags |= LL_FILE_LOCKLESS_IO;
        fd = NULL;
 
        /* Must do this outside lli_och_mutex lock to prevent deadlock where
@@ -1548,7 +1545,7 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
        struct cl_io *io;
        ssize_t result = 0;
        int rc = 0;
-       unsigned int retried = 0, ignore_lockless = 0;
+       unsigned int retried = 0, dio_lock = 0;
        bool is_aio = false;
        struct cl_dio_aio *ci_aio = NULL;
 
@@ -1571,7 +1568,7 @@ restart:
        io = vvp_env_thread_io(env);
        ll_io_init(io, file, iot, args);
        io->ci_aio = ci_aio;
-       io->ci_ignore_lockless = ignore_lockless;
+       io->ci_dio_lock = dio_lock;
        io->ci_ndelay_tried = retried;
 
        if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
@@ -1650,7 +1647,7 @@ out:
                       *ppos, count, result, rc);
                /* preserve the tried count for FLR */
                retried = io->ci_ndelay_tried;
-               ignore_lockless = io->ci_ignore_lockless;
+               dio_lock = io->ci_dio_lock;
                goto restart;
        }
 
index 0cee30b..0cc784d 100644 (file)
@@ -1714,9 +1714,9 @@ int ll_readpage(struct file *file, struct page *vmpage)
         */
        if (file->f_flags & O_DIRECT &&
            lcc && lcc->lcc_type == LCC_RW &&
-           !io->ci_ignore_lockless) {
+           !io->ci_dio_lock) {
                unlock_page(vmpage);
-               io->ci_ignore_lockless = 1;
+               io->ci_dio_lock = 1;
                io->ci_need_restart = 1;
                RETURN(-ENOLCK);
        }
index 65024f2..1415d79 100644 (file)
@@ -679,12 +679,12 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
                        GOTO(out, result = -EBUSY);
 
                /**
-                * Direct read can fall back to buffered read, but DIO is done
+                * Direct write can fall back to buffered read, but DIO is done
                 * with lockless i/o, and buffered requires LDLM locking, so
                 * in this case we must restart without lockless.
                 */
-               if (!io->ci_ignore_lockless) {
-                       io->ci_ignore_lockless = 1;
+               if (!io->ci_dio_lock) {
+                       io->ci_dio_lock = 1;
                        io->ci_need_restart = 1;
                        GOTO(out, result = -ENOLCK);
                }
index ba6f63a..7099633 100644 (file)
@@ -568,11 +568,11 @@ static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
        if (vio->vui_fd) {
                /* Group lock held means no lockless any more */
                if (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)
-                       io->ci_ignore_lockless = 1;
+                       io->ci_dio_lock = 1;
 
                if (ll_file_nolock(vio->vui_fd->fd_file) ||
-                   (vio->vui_fd->fd_flags & LL_FILE_LOCKLESS_IO &&
-                    !io->ci_ignore_lockless))
+                   (vio->vui_fd->fd_file->f_flags & O_DIRECT &&
+                    !io->ci_dio_lock))
                        ast_flags |= CEF_NEVER;
        }
 
index b2f5e38..e01bdc8 100755 (executable)
@@ -22370,6 +22370,13 @@ test_398d() { #  LU-13846
 }
 run_test 398d "run aiocp to verify block size > stripe size"
 
+test_398e() {
+       dd if=/dev/zero of=$DIR/$tfile bs=1234 count=1
+       touch $DIR/$tfile.new
+       dd if=$DIR/$tfile of=$DIR/$tfile.new bs=1M count=1 oflag=direct
+}
+run_test 398e "O_Direct open cleared by fcntl doesn't cause hang"
+
 test_fake_rw() {
        local read_write=$1
        if [ "$read_write" = "write" ]; then