From 4468f6c9d92448cb72c5a616ec74653e83ee8e10 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Thu, 7 Jul 2022 15:38:54 +0800 Subject: [PATCH] LU-16025 llite: adjust read count as file got truncated File read will not notice the file size truncate by another node, and continue to read 0 filled pages beyond the new file size. This patch add a confinement in the read to prevent the issue and add a test case verifying the fix. Signed-off-by: Bobi Jam Change-Id: Ie51ba09201a1ca1464c3a3892d367590e978ee34 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47896 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin Reviewed-by: Sebastien Buisson --- lustre/llite/file.c | 80 +++++++++++++++++++++++++++++++++++++++++--- lustre/llite/glimpse.c | 7 +++- lustre/lov/lov_cl_internal.h | 6 ++-- lustre/lov/lov_object.c | 12 +++---- lustre/tests/sanityn.sh | 45 +++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 14 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 5842e9f..86e55f4 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1939,6 +1939,59 @@ ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter) return result; } +/** + * Confine read iter lest read beyond the EOF + * + * \param iocb [in] kernel iocb + * \param to [in] reader iov_iter + * + * \retval <0 failure + * \retval 0 success + * \retval >0 @iocb->ki_pos has passed the EOF + */ +static int file_read_confine_iter(struct lu_env *env, struct kiocb *iocb, + struct iov_iter *to) +{ + struct cl_attr *attr = vvp_env_thread_attr(env); + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct ll_inode_info *lli = ll_i2info(inode); + loff_t read_end = iocb->ki_pos + iov_iter_count(to); + loff_t kms; + loff_t size; + int rc; + + cl_object_attr_lock(lli->lli_clob); + rc = cl_object_attr_get(env, lli->lli_clob, attr); + cl_object_attr_unlock(lli->lli_clob); + if (rc != 0) + return rc; + + kms = attr->cat_kms; + /* if read beyond end-of-file, adjust read count */ + if (kms > 0 && (iocb->ki_pos >= kms || read_end > kms)) { + rc = ll_glimpse_size(inode); + if (rc != 0) + return rc; + + size = i_size_read(inode); + if (iocb->ki_pos >= size || read_end > size) { + CDEBUG(D_VFSTRACE, + "%s: read [%llu, %llu] over eof, kms %llu, file_size %llu.\n", + file_dentry(file)->d_name.name, + iocb->ki_pos, read_end, kms, size); + + if (iocb->ki_pos >= size) + return 1; + + if (read_end > size) + iov_iter_truncate(to, size - iocb->ki_pos); + } + } + + return rc; +} + /* * Read from a file (through the page cache). */ @@ -1952,6 +2005,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) __u16 refcheck; ktime_t kstart = ktime_get(); bool cached; + bool stale_data = false; ENTRY; @@ -1963,6 +2017,16 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!iov_iter_count(to)) RETURN(0); + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + + result = file_read_confine_iter(env, iocb, to); + if (result < 0) + GOTO(out, result); + else if (result > 0) + stale_data = true; + /** * Currently when PCC read failed, we do not fall back to the * normal read path, just return the error. @@ -1984,10 +2048,6 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (result < 0 || iov_iter_count(to) == 0) GOTO(out, result); - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - RETURN(PTR_ERR(env)); - args = ll_env_args(env); args->u.normal.via_iter = to; args->u.normal.via_iocb = iocb; @@ -1999,8 +2059,18 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) else if (result == 0) result = rc2; - cl_env_put(env, &refcheck); out: + cl_env_put(env, &refcheck); + + if (stale_data && result > 0) { + /** + * we've reached EOF before the read, the data read are cached + * stale data. + */ + iov_iter_truncate(to, 0); + result = 0; + } + if (result > 0) { ll_rw_stats_tally(ll_i2sbi(file_inode(file)), current->pid, file->private_data, iocb->ki_pos, result, diff --git a/lustre/llite/glimpse.c b/lustre/llite/glimpse.c index 2a677a4..bd5e6b6 100644 --- a/lustre/llite/glimpse.c +++ b/lustre/llite/glimpse.c @@ -210,7 +210,12 @@ int cl_glimpse_size0(struct inode *inode, int agl) } else if (result == 0) { result = cl_glimpse_lock(env, io, inode, io->ci_obj, agl); - if (!agl && result == -EAGAIN) + /** + * need to limit retries for FLR mirrors if fast read + * is short because of concurrent truncate. + */ + if (!agl && result == -EAGAIN && + !io->ci_tried_all_mirrors) io->ci_need_restart = 1; } diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index 75d7ff3..36100a0 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -375,8 +375,10 @@ static inline struct lov_layout_entry *lov_entry(struct lov_object *lov, int i) } #define lov_for_layout_entry(lov, entry, start, end) \ - for (entry = lov_entry(lov, start); \ - entry <= lov_entry(lov, end); entry++) + if (lov->u.composite.lo_entries && \ + lov->u.composite.lo_entry_count > 0) \ + for (entry = lov_entry(lov, start); \ + entry <= lov_entry(lov, end); entry++) #define lov_foreach_layout_entry(lov, entry) \ lov_for_layout_entry(lov, entry, 0, \ diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index ed35015..8051878 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -848,19 +848,17 @@ static int lov_delete_composite(const struct lu_env *env, union lov_layout_state *state) { struct lov_layout_entry *entry; - struct lov_layout_composite *comp = &state->composite; ENTRY; dump_lsm(D_INODE, lov->lo_lsm); lov_layout_wait(env, lov); - if (comp->lo_entries) - lov_foreach_layout_entry(lov, entry) { - if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme)) - continue; + lov_foreach_layout_entry(lov, entry) { + if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme)) + continue; - lov_delete_raid0(env, lov, entry); + lov_delete_raid0(env, lov, entry); } RETURN(0); @@ -1009,6 +1007,8 @@ static int lov_attr_get_composite(const struct lu_env *env, attr->cat_size = 0; attr->cat_blocks = 0; + attr->cat_kms = 0; + lov_foreach_layout_entry(lov, entry) { struct cl_attr *lov_attr = NULL; int index = lov_layout_entry_index(lov, entry); diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index ed2cc2a..ae75881 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -631,6 +631,51 @@ test_16h() { } run_test 16h "mmap read after truncate file" +test_16i() { + local tf=$DIR/$tdir/$tfile + local tf2=$DIR2/$tdir/$tfile + + test_mkdir $DIR/$tdir + + # create file and populate data + cp /etc/passwd $tf || error "cp failed" + + local size=$(stat -c %s $tf) + + c1=$(dd if=$tf bs=1 2>/dev/null | od -x | tail -q -n4) + c2=$(dd if=$tf2 bs=1 2>/dev/null | od -x | tail -q -n4) + + if [[ "$c1" != "$c2" ]]; then + echo " ------- mount 1 read --------" + echo $c1 + echo " ------- mount 2 read --------" + echo $c2 + error "content mismatch" + fi + + echo " ------- before truncate --------" + echo $c1 + + # truncate file + $TRUNCATE $tf $((size / 2)) || error "truncate file" + + echo " ------- after truncate --------" + + # repeat the comparison + c1=$(dd if=$tf bs=1 2>/dev/null | od -x | tail -q -n4) + c2=$(dd if=$tf2 bs=1 2>/dev/null | od -x | tail -q -n4) + + if [[ "$c1" != "$c2" ]]; then + echo " ------- mount 1 read --------" + echo $c1 + echo " ------- mount 2 read --------" + echo $c2 + error "content mismatch after truncate" + fi + echo $c2 +} +run_test 16i "read after truncate file" + test_17() { # bug 3513, 3667 remote_ost_nodsh && skip "remote OST with nodsh" && return -- 1.8.3.1