From 3141db609d95d379761e3b54899618b4037d38f6 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 24 Apr 2013 23:05:25 -0400 Subject: [PATCH] LU-3160 clio: don't ignore layout on writeback In some cases such as kernel writeback, we shouldn't ignore the layout, otherwise, it could race with layout change undergoing. Test-Parameters: envdefinitions=DURATION=7200 clientdistro=el6 serverdistro=el6 clientcount=4 osscount=2 mdscount=2 austeroptions=-R failover=true useiscsi=true testlist=recovery-random-scale Signed-off-by: Niu Yawei Signed-off-by: Jinshan Xiong Change-Id: Ib9d0aa581de90711c92db4c631c52f1950ad5b67 Reviewed-on: http://review.whamcloud.com/6154 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Fan Yong Reviewed-by: Oleg Drokin --- lustre/llite/file.c | 6 +++--- lustre/llite/llite_internal.h | 3 ++- lustre/llite/llite_lib.c | 12 +++++++----- lustre/llite/rw.c | 11 +++++++++-- lustre/llite/vvp_io.c | 13 ++++++++++++- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index e61b696..c3dade8 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2374,7 +2374,7 @@ int ll_flush(struct file *file, fl_owner_t id) * Return how many pages have been written. */ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, - enum cl_fsync_mode mode) + enum cl_fsync_mode mode, int ignore_layout) { struct cl_env_nest nest; struct lu_env *env; @@ -2396,7 +2396,7 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, io = ccc_env_thread_io(env); io->ci_obj = cl_i2info(inode)->lli_clob; - io->ci_ignore_layout = 1; + io->ci_ignore_layout = ignore_layout; /* initialize parameters for sync */ fio = &io->u.ci_fsync; @@ -2484,7 +2484,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int datasync) struct ll_file_data *fd = LUSTRE_FPRIVATE(file); err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, - CL_FSYNC_ALL); + CL_FSYNC_ALL, 0); if (rc == 0 && err < 0) rc = err; if (rc < 0) diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index cc7fc04..0d51446 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -465,6 +465,7 @@ struct ll_sb_info { struct lu_fid ll_root_fid; /* root object fid */ int ll_flags; + int ll_umounting:1; cfs_list_t ll_conn_chain; /* per-conn chain of SBs */ struct lustre_client_ocd ll_lco; @@ -1471,7 +1472,7 @@ static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode) struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt); int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, - enum cl_fsync_mode mode); + enum cl_fsync_mode mode, int ignore_layout); /** direct write pages */ struct ll_dio_pages { diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 744ed25..e8e31bf 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -736,9 +736,11 @@ void ll_kill_super(struct super_block *sb) /* we need restore s_dev from changed for clustred NFS before put_super * because new kernels have cached s_dev and change sb->s_dev in * put_super not affected real removing devices */ - if (sbi) - sb->s_dev = sbi->ll_sdev_orig; - EXIT; + if (sbi) { + sb->s_dev = sbi->ll_sdev_orig; + sbi->ll_umounting = 1; + } + EXIT; } char *ll_read_opt(const char *opt, char *data) @@ -1919,7 +1921,8 @@ void ll_delete_inode(struct inode *inode) if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) /* discard all dirty pages before truncating them, required by * osc_extent implementation at LU-1030. */ - cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_DISCARD); + cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, + CL_FSYNC_DISCARD, 1); truncate_inode_pages(&inode->i_data, 0); @@ -2103,7 +2106,6 @@ void ll_umount_begin(struct super_block *sb) OBD_FREE_PTR(ioc_data); } - /* Really, we'd like to wait until there are no requests outstanding, * and then continue. For now, we just invalidate the requests, * schedule() and sleep one second if needed, and hope. diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 0cf7ab2..201f60d 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -1213,7 +1213,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) * PageWriteback or clean the page. */ result = cl_sync_file_range(inode, offset, offset + CFS_PAGE_SIZE - 1, - CL_FSYNC_LOCAL); + CL_FSYNC_LOCAL, 1); if (result > 0) { /* actually we may have written more than one page. * decreasing this page because the caller will count @@ -1240,11 +1240,13 @@ out: int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; + struct ll_sb_info *sbi = ll_i2sbi(inode); loff_t start; loff_t end; enum cl_fsync_mode mode; int range_whole = 0; int result; + int ignore_layout = 0; ENTRY; if (wbc->range_cyclic) { @@ -1263,7 +1265,12 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) mode = CL_FSYNC_LOCAL; - result = cl_sync_file_range(inode, start, end, mode); + if (sbi->ll_umounting) + /* if the mountpoint is being umounted, all pages have to be + * evicted to avoid hitting LBUG when truncate_inode_pages() + * is called later on. */ + ignore_layout = 1; + result = cl_sync_file_range(inode, start, end, mode, ignore_layout); if (result > 0) { wbc->nr_to_write -= result; result = 0; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index db53842..27f58fc 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1214,8 +1214,19 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, /* Enqueue layout lock and get layout version. We need to do this * even for operations requiring to open file, such as read and write, * because it might not grant layout lock in IT_OPEN. */ - if (result == 0 && !io->ci_ignore_layout) + if (result == 0 && !io->ci_ignore_layout) { result = ll_layout_refresh(inode, &cio->cui_layout_gen); + if (result == -ENOENT) + /* If the inode on MDS has been removed, but the objects + * on OSTs haven't been destroyed (async unlink), layout + * fetch will return -ENOENT, we'd ingore this error + * and continue with dirty flush. LU-3230. */ + result = 0; + if (result < 0) + CERROR("%s: refresh file layout " DFID " error %d.\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(lu_object_fid(&obj->co_lu)), result); + } RETURN(result); } -- 1.8.3.1