Whamcloud - gitweb
LU-13377 llite: fix dead loop for short write 18/38018/6
authorWang Shilong <wshilong@ddn.com>
Sat, 21 Mar 2020 01:58:09 +0000 (09:58 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 7 Apr 2020 17:21:25 +0000 (17:21 +0000)
|->vvp_io_write_start()
 |->__generic_file_write_iter()
    |->iov_iter_advance() if write succeed()
  |->vvp_io_write_commit()
     |->update ci_nob

The problem is we will move forward iov iter inside
__generic_file_write_iter(), but @ci_nob will be updated
after vvp_io_write_commit().

If out of quota or some other problems happen, this could
cause a mismatch with @ci_nob and @vui_iter.

And @vui_iter->count will be reset using @ci_nob in
iov_iter_reexpand(), this will make @vui_iter->count
more than what it really left, and we could dead loop
in vvp_mmap_locks() if IO need be retried or restarted:

vvp_io_write_lock+0x45/0x80 [lustre]
cl_io_lock+0x5f/0x3d0 [obdclass]
cl_io_loop+0x92/0x190 [obdclass]
ll_file_io_generic+0x7b3/0xc90 [lustre]
ll_file_aio_write+0x12d/0x1f0 [lustre]
ll_file_write+0xce/0x1e0 [lustre]
vfs_write+0xc0/0x1f0
SyS_write+0x7f/0xf0
system_call_fastpath+0x22/0x27

Change-Id: I5fb4c18cf02fb17bf50122b63decacef678caa01
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/38018
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
lustre/include/obd_support.h
lustre/llite/vvp_io.c
lustre/tests/sanity.sh

index 1efac3f..df40d05 100644 (file)
@@ -580,6 +580,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_PCC_DETACH_MKWRITE          0x1412
 #define OBD_FAIL_LLITE_PCC_MKWRITE_PAUSE           0x1413
 #define OBD_FAIL_LLITE_PCC_ATTACH_PAUSE                    0x1414
+#define OBD_FAIL_LLITE_SHORT_COMMIT                0x1415
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index 94f9899..98bc985 100644 (file)
@@ -1181,6 +1181,7 @@ static int vvp_io_write_start(const struct lu_env *env,
        size_t                   cnt = io->u.ci_wr.wr.crw_count;
        bool                     lock_inode = !IS_NOSEC(inode);
        size_t nob = io->ci_nob;
+       struct iov_iter iter;
        size_t written = 0;
 
        ENTRY;
@@ -1243,6 +1244,7 @@ static int vvp_io_write_start(const struct lu_env *env,
                 * trucates, etc. is handled in the higher layers of lustre.
                 */
                lock_inode = !IS_NOSEC(inode);
+               iter = *vio->vui_iter;
 
                if (unlikely(lock_inode))
                        inode_lock(inode);
@@ -1269,12 +1271,20 @@ static int vvp_io_write_start(const struct lu_env *env,
 
        if (result > 0) {
                result = vvp_io_write_commit(env, io);
+               /* Simulate short commit */
+               if (CFS_FAULT_CHECK(OBD_FAIL_LLITE_SHORT_COMMIT)) {
+                       vio->u.write.vui_written >>= 1;
+                       if (vio->u.write.vui_written > 0)
+                               io->ci_need_restart = 1;
+               }
                if (vio->u.write.vui_written > 0) {
                        result = vio->u.write.vui_written;
                        CDEBUG(D_VFSTRACE, "%s: write nob %zd, result: %zd\n",
                                file_dentry(file)->d_name.name,
                                io->ci_nob, result);
                        io->ci_nob += result;
+               } else {
+                       io->ci_continue = 0;
                }
        }
        if (vio->vui_iocb->ki_pos != (pos + io->ci_nob - nob)) {
@@ -1284,8 +1294,16 @@ static int vvp_io_write_start(const struct lu_env *env,
                       file_dentry(file)->d_name.name,
                       vio->vui_iocb->ki_pos, pos + io->ci_nob - nob,
                       written, io->ci_nob - nob, result);
-               /* rewind ki_pos to where it has successfully committed */
+               /*
+                * Rewind ki_pos and vui_iter to where it has
+                * successfully committed.
+                */
                vio->vui_iocb->ki_pos = pos + io->ci_nob - nob;
+               iov_iter_advance(&iter, io->ci_nob - nob);
+               vio->vui_iter->iov = iter.iov;
+               vio->vui_iter->nr_segs = iter.nr_segs;
+               vio->vui_iter->iov_offset = iter.iov_offset;
+               vio->vui_iter->count = iter.count;
        }
        if (result > 0 || result == -EIOCBQUEUED) {
                ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED);
index e54435d..a073da0 100755 (executable)
@@ -23276,6 +23276,18 @@ test_901() {
 }
 run_test 901 "don't leak a mgc lock on client umount"
 
+# LU-13377
+test_902() {
+       [ $CLIENT_VERSION -lt $(version_code 2.13.52) ] &&
+               skip "client does not have LU-13377 fix"
+       #define OBD_FAIL_LLITE_SHORT_COMMIT 0x1415
+       $LCTL set_param fail_loc=0x1415
+       dd if=/dev/zero of=$DIR/$tfile bs=1M count=1
+       cancel_lru_locks osc
+       rm -f $DIR/$tfile
+}
+run_test 902 "test short write doesn't hang lustre"
+
 complete $SECONDS
 [ -f $EXT2_DEV ] && rm $EXT2_DEV || true
 check_and_cleanup_lustre