Whamcloud - gitweb
LU-16025 llite: adjust read count as file got truncated 96/47896/21
authorBobi Jam <bobijam@whamcloud.com>
Thu, 7 Jul 2022 07:38:54 +0000 (15:38 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 25 Oct 2022 17:21:40 +0000 (17:21 +0000)
File read will not notice the file size truncate by another node,
and continue to read 0 filled pages beyond the new file size.

This patch add a confinement in the read to prevent the issue and
add a test case verifying the fix.

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: Ie51ba09201a1ca1464c3a3892d367590e978ee34
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47896
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Patrick Farrell <farr0186@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
lustre/llite/file.c
lustre/llite/glimpse.c
lustre/lov/lov_cl_internal.h
lustre/lov/lov_object.c
lustre/tests/sanityn.sh

index 5842e9f..86e55f4 100644 (file)
@@ -1939,6 +1939,59 @@ ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter)
        return result;
 }
 
+/**
+ * Confine read iter lest read beyond the EOF
+ *
+ * \param iocb [in]    kernel iocb
+ * \param to [in]      reader iov_iter
+ *
+ * \retval <0  failure
+ * \retval 0   success
+ * \retval >0  @iocb->ki_pos has passed the EOF
+ */
+static int file_read_confine_iter(struct lu_env *env, struct kiocb *iocb,
+                                 struct iov_iter *to)
+{
+       struct cl_attr *attr = vvp_env_thread_attr(env);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct ll_inode_info *lli = ll_i2info(inode);
+       loff_t read_end = iocb->ki_pos + iov_iter_count(to);
+       loff_t kms;
+       loff_t size;
+       int rc;
+
+       cl_object_attr_lock(lli->lli_clob);
+       rc = cl_object_attr_get(env, lli->lli_clob, attr);
+       cl_object_attr_unlock(lli->lli_clob);
+       if (rc != 0)
+               return rc;
+
+       kms = attr->cat_kms;
+       /* if read beyond end-of-file, adjust read count */
+       if (kms > 0 && (iocb->ki_pos >= kms || read_end > kms)) {
+               rc = ll_glimpse_size(inode);
+               if (rc != 0)
+                       return rc;
+
+               size = i_size_read(inode);
+               if (iocb->ki_pos >= size || read_end > size) {
+                       CDEBUG(D_VFSTRACE,
+                              "%s: read [%llu, %llu] over eof, kms %llu, file_size %llu.\n",
+                              file_dentry(file)->d_name.name,
+                              iocb->ki_pos, read_end, kms, size);
+
+                       if (iocb->ki_pos >= size)
+                               return 1;
+
+                       if (read_end > size)
+                               iov_iter_truncate(to, size - iocb->ki_pos);
+               }
+       }
+
+       return rc;
+}
+
 /*
  * Read from a file (through the page cache).
  */
@@ -1952,6 +2005,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        __u16 refcheck;
        ktime_t kstart = ktime_get();
        bool cached;
+       bool stale_data = false;
 
        ENTRY;
 
@@ -1963,6 +2017,16 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (!iov_iter_count(to))
                RETURN(0);
 
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env))
+               RETURN(PTR_ERR(env));
+
+       result = file_read_confine_iter(env, iocb, to);
+       if (result < 0)
+               GOTO(out, result);
+       else if (result > 0)
+               stale_data = true;
+
        /**
         * Currently when PCC read failed, we do not fall back to the
         * normal read path, just return the error.
@@ -1984,10 +2048,6 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (result < 0 || iov_iter_count(to) == 0)
                GOTO(out, result);
 
-       env = cl_env_get(&refcheck);
-       if (IS_ERR(env))
-               RETURN(PTR_ERR(env));
-
        args = ll_env_args(env);
        args->u.normal.via_iter = to;
        args->u.normal.via_iocb = iocb;
@@ -1999,8 +2059,18 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        else if (result == 0)
                result = rc2;
 
-       cl_env_put(env, &refcheck);
 out:
+       cl_env_put(env, &refcheck);
+
+       if (stale_data && result > 0) {
+               /**
+                * we've reached EOF before the read, the data read are cached
+                * stale data.
+                */
+               iov_iter_truncate(to, 0);
+               result = 0;
+       }
+
        if (result > 0) {
                ll_rw_stats_tally(ll_i2sbi(file_inode(file)), current->pid,
                                  file->private_data, iocb->ki_pos, result,
index 2a677a4..bd5e6b6 100644 (file)
@@ -210,7 +210,12 @@ int cl_glimpse_size0(struct inode *inode, int agl)
                } else if (result == 0) {
                        result = cl_glimpse_lock(env, io, inode, io->ci_obj,
                                                 agl);
-                       if (!agl && result == -EAGAIN)
+                       /**
+                        * need to limit retries for FLR mirrors if fast read
+                        * is short because of concurrent truncate.
+                        */
+                       if (!agl && result == -EAGAIN &&
+                           !io->ci_tried_all_mirrors)
                                io->ci_need_restart = 1;
                }
 
index 75d7ff3..36100a0 100644 (file)
@@ -375,8 +375,10 @@ static inline struct lov_layout_entry *lov_entry(struct lov_object *lov, int i)
 }
 
 #define lov_for_layout_entry(lov, entry, start, end)                   \
-       for (entry = lov_entry(lov, start);                             \
-            entry <= lov_entry(lov, end); entry++)
+       if (lov->u.composite.lo_entries &&                              \
+           lov->u.composite.lo_entry_count > 0)                        \
+               for (entry = lov_entry(lov, start);                     \
+                    entry <= lov_entry(lov, end); entry++)
 
 #define lov_foreach_layout_entry(lov, entry)                           \
        lov_for_layout_entry(lov, entry, 0,                             \
index ed35015..8051878 100644 (file)
@@ -848,19 +848,17 @@ static int lov_delete_composite(const struct lu_env *env,
                                union lov_layout_state *state)
 {
        struct lov_layout_entry *entry;
-       struct lov_layout_composite *comp = &state->composite;
 
        ENTRY;
 
        dump_lsm(D_INODE, lov->lo_lsm);
 
        lov_layout_wait(env, lov);
-       if (comp->lo_entries)
-               lov_foreach_layout_entry(lov, entry) {
-                       if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme))
-                               continue;
+       lov_foreach_layout_entry(lov, entry) {
+               if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme))
+                       continue;
 
-                       lov_delete_raid0(env, lov, entry);
+               lov_delete_raid0(env, lov, entry);
        }
 
        RETURN(0);
@@ -1009,6 +1007,8 @@ static int lov_attr_get_composite(const struct lu_env *env,
 
        attr->cat_size = 0;
        attr->cat_blocks = 0;
+       attr->cat_kms = 0;
+
        lov_foreach_layout_entry(lov, entry) {
                struct cl_attr *lov_attr = NULL;
                int index = lov_layout_entry_index(lov, entry);
index ed2cc2a..ae75881 100755 (executable)
@@ -631,6 +631,51 @@ test_16h() {
 }
 run_test 16h "mmap read after truncate file"
 
+test_16i() {
+       local tf=$DIR/$tdir/$tfile
+       local tf2=$DIR2/$tdir/$tfile
+
+       test_mkdir $DIR/$tdir
+
+       # create file and populate data
+       cp /etc/passwd $tf || error "cp failed"
+
+       local size=$(stat -c %s $tf)
+
+       c1=$(dd if=$tf bs=1 2>/dev/null | od -x | tail -q -n4)
+       c2=$(dd if=$tf2 bs=1 2>/dev/null | od -x | tail -q -n4)
+
+       if [[ "$c1" != "$c2" ]]; then
+               echo "  ------- mount 1 read --------"
+               echo $c1
+               echo "  ------- mount 2 read --------"
+               echo $c2
+               error "content mismatch"
+       fi
+
+       echo "  ------- before truncate --------"
+       echo $c1
+
+       # truncate file
+       $TRUNCATE $tf $((size / 2)) || error "truncate file"
+
+       echo "  ------- after truncate --------"
+
+       # repeat the comparison
+       c1=$(dd if=$tf bs=1 2>/dev/null | od -x | tail -q -n4)
+       c2=$(dd if=$tf2 bs=1 2>/dev/null | od -x | tail -q -n4)
+
+       if [[ "$c1" != "$c2" ]]; then
+               echo "  ------- mount 1 read --------"
+               echo $c1
+               echo "  ------- mount 2 read --------"
+               echo $c2
+               error "content mismatch after truncate"
+       fi
+       echo $c2
+}
+run_test 16i "read after truncate file"
+
 test_17() { # bug 3513, 3667
        remote_ost_nodsh && skip "remote OST with nodsh" && return