Whamcloud - gitweb
LU-12382 llite: fix deadloop with tiny write 12/35312/2
authorWang Shilong <wshilong@ddn.com>
Tue, 4 Jun 2019 12:54:01 +0000 (20:54 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 3 Jul 2019 03:23:33 +0000 (03:23 +0000)
For a small write(<4K), we will use tiny write and
__generic_file_write_iter() will be called to handle it.

On newer kernel(4.14 etc), the function is exported and will
do something like following:

|->__generic_file_write_iter
  |->generic_perform_write()

If iov_iter_count() passed in is 0, generic_write_perform() will
try go to forever loop as bytes copied is always calculated as 0.

The problem is VFS doesn't always skip IO count zero before it comes
to lower layer read/write hook, and we should do it by ourselves.

To fix this problem, always return 0 early if there is no
real any IO needed.

Lustre-change: https://review.whamcloud.com/35058
Lustre-commit: e9a543b0d3039027423cb469525015f97caa3a3f

Change-Id: I765a723da79eb5fd09317c3fad47fe479b1dd4fb
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Li Xi <lixi@ddn.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/35312
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
lustre/llite/file.c
lustre/tests/sanity.sh
lustre/tests/writeme.c

index ad5d115..df56a57 100644 (file)
@@ -1579,6 +1579,9 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        ssize_t rc2;
        __u16 refcheck;
 
+       if (!iov_iter_count(to))
+               return 0;
+
        result = ll_do_fast_read(iocb, to);
        if (result < 0 || iov_iter_count(to) == 0)
                GOTO(out, result);
@@ -1673,6 +1676,9 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        ENTRY;
 
+       if (!iov_iter_count(from))
+               GOTO(out, rc_normal = 0);
+
        /* NB: we can't do direct IO for tiny writes because they use the page
         * cache, we can't do sync writes because tiny writes can't flush
         * pages, and we can't do append writes because we can't guarantee the
@@ -1757,6 +1763,9 @@ static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        if (result)
                RETURN(result);
 
+       if (!iov_count)
+               RETURN(0);
+
 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
        iov_iter_init(&to, READ, iov, nr_segs, iov_count);
 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
@@ -1774,8 +1783,12 @@ static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
        struct iovec   iov = { .iov_base = buf, .iov_len = count };
        struct kiocb   kiocb;
        ssize_t        result;
+
        ENTRY;
 
+       if (!count)
+               RETURN(0);
+
        init_sync_kiocb(&kiocb, file);
        kiocb.ki_pos = *ppos;
 #ifdef HAVE_KIOCB_KI_LEFT
@@ -1806,6 +1819,9 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (result)
                RETURN(result);
 
+       if (!iov_count)
+               RETURN(0);
+
 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
        iov_iter_init(&from, WRITE, iov, nr_segs, iov_count);
 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
@@ -1827,6 +1843,9 @@ static ssize_t ll_file_write(struct file *file, const char __user *buf,
 
        ENTRY;
 
+       if (!count)
+               RETURN(0);
+
        init_sync_kiocb(&kiocb, file);
        kiocb.ki_pos = *ppos;
 #ifdef HAVE_KIOCB_KI_LEFT
index 386b668..de82fe9 100755 (executable)
@@ -20322,6 +20322,13 @@ test_814()
 }
 run_test 814 "sparse cp works as expected (LU-12361)"
 
+test_815()
+{
+       writeme -b 100 $DIR/$tfile || error "write 100 bytes failed"
+       writeme -b 0 $DIR/$tfile || error "write 0 byte failed"
+}
+run_test 815 "zero byte tiny write doesn't hang (LU-12382)"
+
 #
 # tests that do cleanup/setup should be run at the end
 #
index f059940..38c7c37 100644 (file)
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <stdbool.h>
 
 void usage(char *prog)
 {
-        printf("usage: %s [-s] filename\n", prog);
+       printf("usage: %s [-s] [-b <bytes>] filename\n", prog);
+       exit(1);
 }
 
 int main(int argc, char **argv)
 {
-        int fd, rc;
-        int do_sync = 0;
-        int file_arg = 1;
-        char buf[4096];
+       bool limit_write = false, do_sync = false;
+       int c, per_write, fd, rc;
+       unsigned long bytes = 0;
+       char buf[4096];
+       char *endptr = NULL;
 
-        memset(buf, 0, 4096);
+       while ((c = getopt(argc, argv, "sb:")) != -1) {
+               switch (c) {
+               case 's':
+                       do_sync = true;
+                       break;
+               case 'b':
+                       limit_write = true;
+                       bytes = strtoul(optarg, &endptr, 10);
+                       if (endptr != NULL && *endptr != '\0')
+                               usage(argv[0]);
+                       break;
+               default:
+                       usage(argv[0]);
+                       break;
+               }
+       }
 
-        if (argc < 2 || argc > 3) {
+       if (argc - optind != 1)
                usage(argv[0]);
-                exit(1);
-        }
 
-        if (strcmp(argv[1], "-s") == 0) {
-                do_sync = 1;
-               file_arg++;
-        }
+       memset(buf, 0, 4096);
+       fd = open(argv[optind], O_RDWR | O_CREAT, 0600);
+       if (fd == -1) {
+               printf("Error opening %s\n", argv[1]);
+               exit(1);
+       }
 
-        fd = open(argv[file_arg], O_RDWR | O_CREAT, 0600);
-        if (fd == -1) {
-                printf("Error opening %s\n", argv[1]);
-                exit(1);
-        }
+       /* Even 0 bytes, write at least once */
+       if (limit_write) {
+               do {
+                       per_write = bytes > 4096 ? 4096 : bytes;
+                       rc = write(fd, buf, per_write);
+                       if (rc > 0)
+                               bytes -= rc;
+                       else if (rc < 0)
+                               break;
+               } while (bytes > 0);
 
-        for (rc = 0; ;) {
-                sprintf(buf, "write %d\n", rc);
-                rc = write(fd, buf, sizeof(buf));
+               return rc >= 0 ? 0 : rc;
+       }
+
+       for (rc = 0; ;) {
+               sprintf(buf, "write %d\n", rc);
+               rc = write(fd, buf, sizeof(buf));
                if (do_sync)
                        sync();
-                sleep(1);
-        }
-        return 0;
+               sleep(1);
+       }
+
+       return 0;
 }