From 9e8b4e2fc2f0b3ad61b0fed9326580dad0389cbf Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sat, 21 Mar 2020 09:58:09 +0800 Subject: [PATCH] LU-13377 llite: fix dead loop for short write |->vvp_io_write_start() |->__generic_file_write_iter() |->iov_iter_advance() if write succeed() |->vvp_io_write_commit() |->update ci_nob The problem is we will move forward iov iter inside __generic_file_write_iter(), but @ci_nob will be updated after vvp_io_write_commit(). If out of quota or some other problems happen, this could cause a mismatch with @ci_nob and @vui_iter. And @vui_iter->count will be reset using @ci_nob in iov_iter_reexpand(), this will make @vui_iter->count more than what it really left, and we could dead loop in vvp_mmap_locks() if IO need be retried or restarted: vvp_io_write_lock+0x45/0x80 [lustre] cl_io_lock+0x5f/0x3d0 [obdclass] cl_io_loop+0x92/0x190 [obdclass] ll_file_io_generic+0x7b3/0xc90 [lustre] ll_file_aio_write+0x12d/0x1f0 [lustre] ll_file_write+0xce/0x1e0 [lustre] vfs_write+0xc0/0x1f0 SyS_write+0x7f/0xf0 system_call_fastpath+0x22/0x27 Lustre-change: https://review.whamcloud.com/38018 Lustre-commit: 13dfe0df4956afb50b323a11615b0b34ed014e53 Change-Id: I5fb4c18cf02fb17bf50122b63decacef678caa01 Signed-off-by: Wang Shilong Reviewed-by: Andreas Dilger Reviewed-by: Bobi Jam Reviewed-on: https://review.whamcloud.com/38163 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/llite/vvp_io.c | 20 +++++++++++++++++++- lustre/tests/sanity.sh | 12 ++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index a6e9c64..80d87fb 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -571,6 +571,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_IMUTEX_SEC 0x140e #define OBD_FAIL_LLITE_IMUTEX_NOSEC 0x140f #define OBD_FAIL_LLITE_OPEN_BY_NAME 0x1410 +#define OBD_FAIL_LLITE_SHORT_COMMIT 0x1415 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 5beb5c5..5a08d5d 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1031,6 +1031,7 @@ static int vvp_io_write_start(const struct lu_env *env, size_t cnt = io->u.ci_wr.wr.crw_count; bool lock_inode = !IS_NOSEC(inode); size_t nob = io->ci_nob; + struct iov_iter iter; size_t written = 0; ENTRY; @@ -1093,6 +1094,7 @@ static int vvp_io_write_start(const struct lu_env *env, * trucates, etc. is handled in the higher layers of lustre. */ lock_inode = !IS_NOSEC(inode); + iter = *vio->vui_iter; if (unlikely(lock_inode)) inode_lock(inode); @@ -1119,12 +1121,20 @@ static int vvp_io_write_start(const struct lu_env *env, if (result > 0) { result = vvp_io_write_commit(env, io); + /* Simulate short commit */ + if (CFS_FAULT_CHECK(OBD_FAIL_LLITE_SHORT_COMMIT)) { + vio->u.write.vui_written >>= 1; + if (vio->u.write.vui_written > 0) + io->ci_need_restart = 1; + } if (vio->u.write.vui_written > 0) { result = vio->u.write.vui_written; CDEBUG(D_VFSTRACE, "%s: write nob %zd, result: %zd\n", file_dentry(file)->d_name.name, io->ci_nob, result); io->ci_nob += result; + } else { + io->ci_continue = 0; } } if (vio->vui_iocb->ki_pos != (pos + io->ci_nob - nob)) { @@ -1134,8 +1144,16 @@ static int vvp_io_write_start(const struct lu_env *env, file_dentry(file)->d_name.name, vio->vui_iocb->ki_pos, pos + io->ci_nob - nob, written, io->ci_nob - nob, result); - /* rewind ki_pos to where it has successfully committed */ + /* + * Rewind ki_pos and vui_iter to where it has + * successfully committed. + */ vio->vui_iocb->ki_pos = pos + io->ci_nob - nob; + iov_iter_advance(&iter, io->ci_nob - nob); + vio->vui_iter->iov = iter.iov; + vio->vui_iter->nr_segs = iter.nr_segs; + vio->vui_iter->iov_offset = iter.iov_offset; + vio->vui_iter->count = iter.count; } if (result > 0) { ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c9d56d1..79e3f56 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -21463,6 +21463,18 @@ test_900() { } run_test 900 "umount should not race with any mgc requeue thread" +# LU-13377 +test_902() { + [ $CLIENT_VERSION -lt $(version_code 2.13.52) ] && + skip "client does not have LU-13377 fix" + #define OBD_FAIL_LLITE_SHORT_COMMIT 0x1415 + $LCTL set_param fail_loc=0x1415 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 + cancel_lru_locks osc + rm -f $DIR/$tfile +} +run_test 902 "test short write doesn't hang lustre" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre -- 1.8.3.1